Command line
python /home/saxelrod/Repo/projects/chemprop/chemprop/train.py --config_path /home/saxelrod/synthetic/energy/train/config.json --data_path /home/saxelrod/synthetic/energy/train_full.csv --dataset_type regression
Args
{'activation': 'ReLU',
 'aggregation': 'mean',
 'aggregation_norm': 100,
 'atom_descriptors': None,
 'atom_descriptors_path': None,
 'atom_descriptors_size': 0,
 'atom_features_size': 0,
 'atom_messages': False,
 'batch_size': 50,
 'bias': False,
 'cache_cutoff': 10000,
 'checkpoint_dir': None,
 'checkpoint_path': None,
 'checkpoint_paths': None,
 'class_balance': False,
 'config_path': '/home/saxelrod/synthetic/energy/train/config.json',
 'crossval_index_dir': None,
 'crossval_index_file': None,
 'crossval_index_sets': None,
 'cuda': True,
 'data_path': '/home/saxelrod/synthetic/energy/train_full.csv',
 'dataset_type': 'regression',
 'depth': 6,
 'device': device(type='cuda', index=1),
 'dropout': 0.2,
 'ensemble_size': 1,
 'epochs': 30,
 'extra_metrics': [],
 'features_generator': None,
 'features_only': False,
 'features_path': None,
 'features_scaling': False,
 'features_size': None,
 'ffn_hidden_size': 300,
 'ffn_num_layers': 3,
 'final_lr': 0.0001,
 'folds_file': None,
 'gpu': 1,
 'grad_clip': None,
 'hidden_size': 300,
 'ignore_columns': None,
 'init_lr': 0.0001,
 'log_frequency': 10,
 'max_data_size': None,
 'max_lr': 0.001,
 'metric': 'mae',
 'metrics': ['mae'],
 'minimize_score': True,
 'mpn_shared': False,
 'multiclass_num_classes': 3,
 'no_cache_mol': False,
 'no_cuda': False,
 'no_features_scaling': True,
 'num_folds': 10,
 'num_lrs': 1,
 'num_tasks': 1,
 'num_workers': 8,
 'number_of_molecules': 1,
 'pytorch_seed': 0,
 'quiet': True,
 'save_dir': '/home/saxelrod/synthetic/energy/train',
 'save_preds': False,
 'save_smiles_splits': False,
 'seed': 0,
 'separate_test_features_path': None,
 'separate_test_path': '/home/saxelrod/synthetic/energy/test_full.csv',
 'separate_val_features_path': None,
 'separate_val_path': '/home/saxelrod/synthetic/energy/val_full.csv',
 'show_individual_scores': False,
 'smiles_columns': [None],
 'split_sizes': (0.8, 0.1, 0.1),
 'split_type': 'random',
 'target_columns': None,
 'task_names': ['ensembleenergy'],
 'test': False,
 'test_fold_index': None,
 'train_data_size': None,
 'undirected': False,
 'use_input_features': False,
 'val_fold_index': None,
 'warmup_epochs': 2.0}
Loading data
Number of tasks = 1
Fold 0
Splitting data with seed 0
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=300, bias=False)
        (W_h): Linear(in_features=300, out_features=300, bias=False)
        (W_o): Linear(in_features=433, out_features=300, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.2, inplace=False)
    (1): Linear(in_features=300, out_features=300, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=300, out_features=300, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.2, inplace=False)
    (7): Linear(in_features=300, out_features=1, bias=True)
  )
)
Number of parameters = 445,201
Moving model to cuda
Epoch 0
Loss = 9.5998e-01, PNorm = 38.1707, GNorm = 3.2195, lr_0 = 1.0413e-04
Loss = 1.0426e+00, PNorm = 38.1706, GNorm = 3.0344, lr_0 = 1.0788e-04
Loss = 1.0008e+00, PNorm = 38.1708, GNorm = 2.6693, lr_0 = 1.1163e-04
Loss = 9.0946e-01, PNorm = 38.1711, GNorm = 2.9240, lr_0 = 1.1537e-04
Loss = 9.5634e-01, PNorm = 38.1717, GNorm = 2.4156, lr_0 = 1.1913e-04
Loss = 9.2349e-01, PNorm = 38.1726, GNorm = 3.5884, lr_0 = 1.2287e-04
Loss = 8.6422e-01, PNorm = 38.1733, GNorm = 3.2229, lr_0 = 1.2663e-04
Loss = 8.1328e-01, PNorm = 38.1739, GNorm = 2.1394, lr_0 = 1.3038e-04
Loss = 9.4528e-01, PNorm = 38.1749, GNorm = 4.1353, lr_0 = 1.3413e-04
Loss = 9.0497e-01, PNorm = 38.1759, GNorm = 2.0742, lr_0 = 1.3788e-04
Loss = 8.8005e-01, PNorm = 38.1775, GNorm = 2.2375, lr_0 = 1.4163e-04
Loss = 9.4699e-01, PNorm = 38.1791, GNorm = 3.5309, lr_0 = 1.4537e-04
Loss = 8.0308e-01, PNorm = 38.1809, GNorm = 3.4202, lr_0 = 1.4913e-04
Loss = 8.3097e-01, PNorm = 38.1824, GNorm = 2.6078, lr_0 = 1.5288e-04
Loss = 8.5786e-01, PNorm = 38.1840, GNorm = 4.3443, lr_0 = 1.5662e-04
Loss = 8.3580e-01, PNorm = 38.1865, GNorm = 5.2503, lr_0 = 1.6038e-04
Loss = 7.5455e-01, PNorm = 38.1889, GNorm = 1.9202, lr_0 = 1.6412e-04
Loss = 8.5873e-01, PNorm = 38.1925, GNorm = 6.2772, lr_0 = 1.6788e-04
Loss = 7.2688e-01, PNorm = 38.1958, GNorm = 3.1010, lr_0 = 1.7163e-04
Loss = 8.5516e-01, PNorm = 38.1982, GNorm = 3.3101, lr_0 = 1.7538e-04
Loss = 7.5530e-01, PNorm = 38.2006, GNorm = 6.5629, lr_0 = 1.7913e-04
Loss = 7.8574e-01, PNorm = 38.2029, GNorm = 6.0220, lr_0 = 1.8288e-04
Loss = 7.9844e-01, PNorm = 38.2057, GNorm = 4.5256, lr_0 = 1.8662e-04
Loss = 8.0984e-01, PNorm = 38.2082, GNorm = 6.4922, lr_0 = 1.9038e-04
Loss = 7.7023e-01, PNorm = 38.2107, GNorm = 2.4091, lr_0 = 1.9413e-04
Loss = 8.0173e-01, PNorm = 38.2136, GNorm = 4.2448, lr_0 = 1.9788e-04
Loss = 9.6016e-01, PNorm = 38.2169, GNorm = 6.1871, lr_0 = 2.0163e-04
Loss = 7.7196e-01, PNorm = 38.2187, GNorm = 5.8581, lr_0 = 2.0537e-04
Loss = 7.2919e-01, PNorm = 38.2216, GNorm = 2.2895, lr_0 = 2.0913e-04
Loss = 6.1673e-01, PNorm = 38.2265, GNorm = 7.8531, lr_0 = 2.1288e-04
Loss = 6.8546e-01, PNorm = 38.2291, GNorm = 2.9406, lr_0 = 2.1663e-04
Loss = 7.3635e-01, PNorm = 38.2310, GNorm = 7.2832, lr_0 = 2.2038e-04
Loss = 8.1885e-01, PNorm = 38.2322, GNorm = 1.9583, lr_0 = 2.2412e-04
Loss = 7.2744e-01, PNorm = 38.2337, GNorm = 9.3667, lr_0 = 2.2787e-04
Loss = 7.9003e-01, PNorm = 38.2365, GNorm = 2.2311, lr_0 = 2.3163e-04
Loss = 6.9193e-01, PNorm = 38.2418, GNorm = 2.7081, lr_0 = 2.3538e-04
Loss = 6.7221e-01, PNorm = 38.2451, GNorm = 2.3197, lr_0 = 2.3913e-04
Loss = 6.9061e-01, PNorm = 38.2491, GNorm = 1.4795, lr_0 = 2.4288e-04
Loss = 7.5157e-01, PNorm = 38.2522, GNorm = 1.6176, lr_0 = 2.4662e-04
Loss = 7.9080e-01, PNorm = 38.2546, GNorm = 7.1267, lr_0 = 2.5038e-04
Loss = 7.7441e-01, PNorm = 38.2564, GNorm = 1.7086, lr_0 = 2.5413e-04
Loss = 8.6999e-01, PNorm = 38.2597, GNorm = 3.3749, lr_0 = 2.5788e-04
Loss = 6.8979e-01, PNorm = 38.2631, GNorm = 1.8413, lr_0 = 2.6163e-04
Loss = 6.9220e-01, PNorm = 38.2676, GNorm = 1.2441, lr_0 = 2.6537e-04
Loss = 7.0121e-01, PNorm = 38.2718, GNorm = 5.0246, lr_0 = 2.6912e-04
Loss = 7.0350e-01, PNorm = 38.2749, GNorm = 4.2852, lr_0 = 2.7288e-04
Loss = 7.2668e-01, PNorm = 38.2784, GNorm = 5.1536, lr_0 = 2.7663e-04
Loss = 6.7395e-01, PNorm = 38.2850, GNorm = 3.3383, lr_0 = 2.8038e-04
Loss = 7.3960e-01, PNorm = 38.2870, GNorm = 5.8026, lr_0 = 2.8413e-04
Loss = 6.6724e-01, PNorm = 38.2927, GNorm = 2.5798, lr_0 = 2.8787e-04
Loss = 7.8379e-01, PNorm = 38.2965, GNorm = 4.2344, lr_0 = 2.9163e-04
Loss = 7.2451e-01, PNorm = 38.2993, GNorm = 6.5695, lr_0 = 2.9538e-04
Loss = 6.9659e-01, PNorm = 38.3033, GNorm = 1.6540, lr_0 = 2.9913e-04
Loss = 7.1807e-01, PNorm = 38.3075, GNorm = 2.0296, lr_0 = 3.0288e-04
Loss = 7.0582e-01, PNorm = 38.3137, GNorm = 2.7803, lr_0 = 3.0662e-04
Loss = 6.6618e-01, PNorm = 38.3179, GNorm = 1.3841, lr_0 = 3.1037e-04
Loss = 6.7029e-01, PNorm = 38.3211, GNorm = 4.6677, lr_0 = 3.1413e-04
Loss = 7.6537e-01, PNorm = 38.3273, GNorm = 3.6097, lr_0 = 3.1788e-04
Loss = 7.3736e-01, PNorm = 38.3289, GNorm = 3.1218, lr_0 = 3.2163e-04
Loss = 6.7614e-01, PNorm = 38.3326, GNorm = 7.3285, lr_0 = 3.2538e-04
Loss = 6.5807e-01, PNorm = 38.3369, GNorm = 2.4093, lr_0 = 3.2912e-04
Loss = 6.6720e-01, PNorm = 38.3440, GNorm = 1.7790, lr_0 = 3.3288e-04
Loss = 6.7191e-01, PNorm = 38.3476, GNorm = 1.3251, lr_0 = 3.3663e-04
Loss = 7.7017e-01, PNorm = 38.3531, GNorm = 5.5913, lr_0 = 3.4038e-04
Loss = 7.1001e-01, PNorm = 38.3562, GNorm = 9.5837, lr_0 = 3.4413e-04
Loss = 7.2630e-01, PNorm = 38.3593, GNorm = 2.3540, lr_0 = 3.4787e-04
Loss = 7.2851e-01, PNorm = 38.3662, GNorm = 1.4634, lr_0 = 3.5162e-04
Loss = 5.8616e-01, PNorm = 38.3718, GNorm = 1.3700, lr_0 = 3.5538e-04
Loss = 7.8308e-01, PNorm = 38.3758, GNorm = 4.9669, lr_0 = 3.5913e-04
Loss = 7.3922e-01, PNorm = 38.3811, GNorm = 2.1380, lr_0 = 3.6288e-04
Loss = 6.1934e-01, PNorm = 38.3863, GNorm = 10.6555, lr_0 = 3.6662e-04
Loss = 6.7439e-01, PNorm = 38.3912, GNorm = 2.1394, lr_0 = 3.7037e-04
Loss = 6.9233e-01, PNorm = 38.3932, GNorm = 3.4800, lr_0 = 3.7413e-04
Loss = 6.5531e-01, PNorm = 38.3990, GNorm = 1.5733, lr_0 = 3.7788e-04
Loss = 6.4267e-01, PNorm = 38.4061, GNorm = 4.8674, lr_0 = 3.8163e-04
Loss = 6.2315e-01, PNorm = 38.4108, GNorm = 1.3543, lr_0 = 3.8537e-04
Loss = 6.8193e-01, PNorm = 38.4204, GNorm = 5.3771, lr_0 = 3.8912e-04
Loss = 7.1347e-01, PNorm = 38.4281, GNorm = 2.3431, lr_0 = 3.9287e-04
Loss = 7.0052e-01, PNorm = 38.4328, GNorm = 1.9448, lr_0 = 3.9663e-04
Loss = 8.1275e-01, PNorm = 38.4427, GNorm = 1.7365, lr_0 = 4.0038e-04
Loss = 6.6267e-01, PNorm = 38.4523, GNorm = 1.3738, lr_0 = 4.0413e-04
Loss = 6.4380e-01, PNorm = 38.4581, GNorm = 1.1749, lr_0 = 4.0787e-04
Loss = 6.1990e-01, PNorm = 38.4649, GNorm = 1.3844, lr_0 = 4.1162e-04
Loss = 6.0873e-01, PNorm = 38.4654, GNorm = 2.1865, lr_0 = 4.1537e-04
Loss = 6.5072e-01, PNorm = 38.4749, GNorm = 3.1960, lr_0 = 4.1913e-04
Loss = 5.8684e-01, PNorm = 38.4848, GNorm = 3.3147, lr_0 = 4.2288e-04
Loss = 6.3504e-01, PNorm = 38.4910, GNorm = 1.5542, lr_0 = 4.2662e-04
Loss = 7.6547e-01, PNorm = 38.4966, GNorm = 2.1637, lr_0 = 4.3037e-04
Loss = 6.5149e-01, PNorm = 38.5035, GNorm = 3.9970, lr_0 = 4.3412e-04
Loss = 5.8485e-01, PNorm = 38.5070, GNorm = 2.8656, lr_0 = 4.3788e-04
Loss = 6.3435e-01, PNorm = 38.5152, GNorm = 3.7479, lr_0 = 4.4163e-04
Loss = 6.2865e-01, PNorm = 38.5208, GNorm = 1.7859, lr_0 = 4.4538e-04
Loss = 6.5564e-01, PNorm = 38.5266, GNorm = 2.9130, lr_0 = 4.4912e-04
Loss = 5.8830e-01, PNorm = 38.5325, GNorm = 8.6861, lr_0 = 4.5287e-04
Loss = 6.3474e-01, PNorm = 38.5408, GNorm = 2.3278, lr_0 = 4.5662e-04
Loss = 7.5017e-01, PNorm = 38.5490, GNorm = 7.7117, lr_0 = 4.6038e-04
Loss = 6.9622e-01, PNorm = 38.5602, GNorm = 1.9518, lr_0 = 4.6413e-04
Loss = 6.6987e-01, PNorm = 38.5675, GNorm = 6.1104, lr_0 = 4.6787e-04
Loss = 6.8925e-01, PNorm = 38.5761, GNorm = 2.2703, lr_0 = 4.7162e-04
Loss = 5.8371e-01, PNorm = 38.5876, GNorm = 1.2886, lr_0 = 4.7537e-04
Loss = 6.5776e-01, PNorm = 38.5935, GNorm = 4.2271, lr_0 = 4.7913e-04
Loss = 6.8908e-01, PNorm = 38.5988, GNorm = 3.7292, lr_0 = 4.8288e-04
Loss = 5.6759e-01, PNorm = 38.6070, GNorm = 1.4278, lr_0 = 4.8663e-04
Loss = 5.8247e-01, PNorm = 38.6123, GNorm = 1.5586, lr_0 = 4.9038e-04
Loss = 6.8899e-01, PNorm = 38.6217, GNorm = 2.2406, lr_0 = 4.9412e-04
Loss = 5.7604e-01, PNorm = 38.6307, GNorm = 2.1888, lr_0 = 4.9788e-04
Loss = 6.4071e-01, PNorm = 38.6377, GNorm = 1.3548, lr_0 = 5.0163e-04
Loss = 6.2413e-01, PNorm = 38.6440, GNorm = 1.6620, lr_0 = 5.0538e-04
Loss = 6.3042e-01, PNorm = 38.6508, GNorm = 5.5451, lr_0 = 5.0913e-04
Loss = 6.5315e-01, PNorm = 38.6656, GNorm = 1.7228, lr_0 = 5.1287e-04
Loss = 7.3293e-01, PNorm = 38.6705, GNorm = 1.6406, lr_0 = 5.1663e-04
Loss = 6.7326e-01, PNorm = 38.6843, GNorm = 1.9464, lr_0 = 5.2038e-04
Loss = 6.1264e-01, PNorm = 38.6927, GNorm = 5.4242, lr_0 = 5.2413e-04
Loss = 6.9003e-01, PNorm = 38.6998, GNorm = 1.1246, lr_0 = 5.2788e-04
Loss = 6.0005e-01, PNorm = 38.7114, GNorm = 2.4217, lr_0 = 5.3162e-04
Loss = 6.2616e-01, PNorm = 38.7182, GNorm = 1.7324, lr_0 = 5.3538e-04
Loss = 7.1121e-01, PNorm = 38.7267, GNorm = 2.8220, lr_0 = 5.3912e-04
Loss = 5.8963e-01, PNorm = 38.7365, GNorm = 2.9770, lr_0 = 5.4288e-04
Loss = 5.5184e-01, PNorm = 38.7422, GNorm = 1.4951, lr_0 = 5.4663e-04
Loss = 5.5518e-01, PNorm = 38.7559, GNorm = 1.1499, lr_0 = 5.5038e-04
Validation mae = 0.135526
Epoch 1
Loss = 6.1135e-01, PNorm = 38.7653, GNorm = 4.2480, lr_0 = 5.5413e-04
Loss = 6.3333e-01, PNorm = 38.7754, GNorm = 5.7473, lr_0 = 5.5787e-04
Loss = 5.9002e-01, PNorm = 38.7805, GNorm = 1.3791, lr_0 = 5.6163e-04
Loss = 6.9074e-01, PNorm = 38.7905, GNorm = 1.3026, lr_0 = 5.6538e-04
Loss = 6.2701e-01, PNorm = 38.8038, GNorm = 1.4553, lr_0 = 5.6913e-04
Loss = 6.0276e-01, PNorm = 38.8098, GNorm = 0.9901, lr_0 = 5.7288e-04
Loss = 5.0144e-01, PNorm = 38.8176, GNorm = 1.1546, lr_0 = 5.7662e-04
Loss = 6.1790e-01, PNorm = 38.8310, GNorm = 3.6973, lr_0 = 5.8038e-04
Loss = 6.0167e-01, PNorm = 38.8410, GNorm = 1.2975, lr_0 = 5.8413e-04
Loss = 5.5844e-01, PNorm = 38.8515, GNorm = 1.7728, lr_0 = 5.8788e-04
Loss = 5.7980e-01, PNorm = 38.8611, GNorm = 1.6079, lr_0 = 5.9163e-04
Loss = 5.4495e-01, PNorm = 38.8772, GNorm = 3.6282, lr_0 = 5.9538e-04
Loss = 6.6598e-01, PNorm = 38.8925, GNorm = 2.6232, lr_0 = 5.9913e-04
Loss = 5.8894e-01, PNorm = 38.9043, GNorm = 4.5120, lr_0 = 6.0288e-04
Loss = 4.7763e-01, PNorm = 38.9188, GNorm = 1.4996, lr_0 = 6.0663e-04
Loss = 5.1736e-01, PNorm = 38.9310, GNorm = 2.5066, lr_0 = 6.1038e-04
Loss = 6.7530e-01, PNorm = 38.9374, GNorm = 6.8791, lr_0 = 6.1413e-04
Loss = 6.3300e-01, PNorm = 38.9473, GNorm = 5.7767, lr_0 = 6.1788e-04
Loss = 5.5786e-01, PNorm = 38.9583, GNorm = 3.4795, lr_0 = 6.2163e-04
Loss = 6.5564e-01, PNorm = 38.9744, GNorm = 1.5701, lr_0 = 6.2538e-04
Loss = 5.6971e-01, PNorm = 38.9902, GNorm = 1.4991, lr_0 = 6.2913e-04
Loss = 6.7851e-01, PNorm = 39.0016, GNorm = 3.2387, lr_0 = 6.3288e-04
Loss = 5.9766e-01, PNorm = 39.0077, GNorm = 2.3328, lr_0 = 6.3663e-04
Loss = 6.5923e-01, PNorm = 39.0234, GNorm = 5.8557, lr_0 = 6.4038e-04
Loss = 5.8723e-01, PNorm = 39.0362, GNorm = 1.3953, lr_0 = 6.4413e-04
Loss = 6.1849e-01, PNorm = 39.0467, GNorm = 1.8598, lr_0 = 6.4788e-04
Loss = 6.2363e-01, PNorm = 39.0569, GNorm = 4.5602, lr_0 = 6.5163e-04
Loss = 6.4515e-01, PNorm = 39.0720, GNorm = 2.3712, lr_0 = 6.5538e-04
Loss = 5.7736e-01, PNorm = 39.0876, GNorm = 2.6663, lr_0 = 6.5913e-04
Loss = 5.8401e-01, PNorm = 39.0994, GNorm = 2.5194, lr_0 = 6.6288e-04
Loss = 6.6424e-01, PNorm = 39.1103, GNorm = 4.5985, lr_0 = 6.6663e-04
Loss = 5.2772e-01, PNorm = 39.1202, GNorm = 3.8357, lr_0 = 6.7038e-04
Loss = 5.7772e-01, PNorm = 39.1325, GNorm = 1.3325, lr_0 = 6.7413e-04
Loss = 6.1574e-01, PNorm = 39.1510, GNorm = 6.4179, lr_0 = 6.7788e-04
Loss = 5.1791e-01, PNorm = 39.1665, GNorm = 5.2509, lr_0 = 6.8163e-04
Loss = 6.1009e-01, PNorm = 39.1783, GNorm = 1.5431, lr_0 = 6.8538e-04
Loss = 5.7941e-01, PNorm = 39.1971, GNorm = 2.3389, lr_0 = 6.8913e-04
Loss = 6.2247e-01, PNorm = 39.2076, GNorm = 4.5842, lr_0 = 6.9288e-04
Loss = 6.1218e-01, PNorm = 39.2289, GNorm = 4.1261, lr_0 = 6.9663e-04
Loss = 6.3929e-01, PNorm = 39.2509, GNorm = 8.9683, lr_0 = 7.0038e-04
Loss = 6.7977e-01, PNorm = 39.2679, GNorm = 2.6673, lr_0 = 7.0413e-04
Loss = 6.8131e-01, PNorm = 39.2814, GNorm = 3.5793, lr_0 = 7.0788e-04
Loss = 6.3827e-01, PNorm = 39.2967, GNorm = 3.2542, lr_0 = 7.1163e-04
Loss = 6.5338e-01, PNorm = 39.3126, GNorm = 1.1545, lr_0 = 7.1538e-04
Loss = 5.9172e-01, PNorm = 39.3302, GNorm = 1.0929, lr_0 = 7.1913e-04
Loss = 6.4262e-01, PNorm = 39.3512, GNorm = 1.5674, lr_0 = 7.2288e-04
Loss = 5.3278e-01, PNorm = 39.3718, GNorm = 2.2942, lr_0 = 7.2663e-04
Loss = 5.9442e-01, PNorm = 39.3854, GNorm = 1.7192, lr_0 = 7.3038e-04
Loss = 5.4210e-01, PNorm = 39.3951, GNorm = 1.4512, lr_0 = 7.3413e-04
Loss = 5.6699e-01, PNorm = 39.4087, GNorm = 2.7324, lr_0 = 7.3788e-04
Loss = 6.2326e-01, PNorm = 39.4224, GNorm = 1.5180, lr_0 = 7.4163e-04
Loss = 6.4883e-01, PNorm = 39.4279, GNorm = 3.1959, lr_0 = 7.4538e-04
Loss = 6.1046e-01, PNorm = 39.4351, GNorm = 1.3342, lr_0 = 7.4913e-04
Loss = 5.1999e-01, PNorm = 39.4525, GNorm = 3.5767, lr_0 = 7.5288e-04
Loss = 5.9970e-01, PNorm = 39.4743, GNorm = 3.2683, lr_0 = 7.5663e-04
Loss = 6.2217e-01, PNorm = 39.4944, GNorm = 2.1820, lr_0 = 7.6038e-04
Loss = 6.0431e-01, PNorm = 39.5040, GNorm = 1.7646, lr_0 = 7.6413e-04
Loss = 5.9519e-01, PNorm = 39.5210, GNorm = 2.8518, lr_0 = 7.6788e-04
Loss = 5.4766e-01, PNorm = 39.5403, GNorm = 3.4261, lr_0 = 7.7163e-04
Loss = 6.3050e-01, PNorm = 39.5568, GNorm = 4.7007, lr_0 = 7.7538e-04
Loss = 5.4106e-01, PNorm = 39.5692, GNorm = 4.9319, lr_0 = 7.7913e-04
Loss = 5.9494e-01, PNorm = 39.5817, GNorm = 2.4433, lr_0 = 7.8288e-04
Loss = 6.3980e-01, PNorm = 39.5943, GNorm = 2.0467, lr_0 = 7.8663e-04
Loss = 5.1978e-01, PNorm = 39.6166, GNorm = 2.8513, lr_0 = 7.9038e-04
Loss = 5.8668e-01, PNorm = 39.6330, GNorm = 1.5330, lr_0 = 7.9413e-04
Loss = 6.0397e-01, PNorm = 39.6508, GNorm = 1.6139, lr_0 = 7.9788e-04
Loss = 5.2097e-01, PNorm = 39.6657, GNorm = 1.2521, lr_0 = 8.0163e-04
Loss = 5.6247e-01, PNorm = 39.6733, GNorm = 2.0868, lr_0 = 8.0538e-04
Loss = 5.5564e-01, PNorm = 39.6810, GNorm = 2.1995, lr_0 = 8.0913e-04
Loss = 5.5035e-01, PNorm = 39.7022, GNorm = 3.0820, lr_0 = 8.1288e-04
Loss = 5.5551e-01, PNorm = 39.7090, GNorm = 2.0580, lr_0 = 8.1663e-04
Loss = 5.7833e-01, PNorm = 39.7268, GNorm = 1.9226, lr_0 = 8.2038e-04
Loss = 6.0496e-01, PNorm = 39.7404, GNorm = 2.0988, lr_0 = 8.2413e-04
Loss = 6.7690e-01, PNorm = 39.7651, GNorm = 5.2898, lr_0 = 8.2788e-04
Loss = 4.9942e-01, PNorm = 39.7832, GNorm = 2.0776, lr_0 = 8.3163e-04
Loss = 5.7592e-01, PNorm = 39.7949, GNorm = 2.1491, lr_0 = 8.3538e-04
Loss = 5.4962e-01, PNorm = 39.8165, GNorm = 5.4350, lr_0 = 8.3913e-04
Loss = 5.0512e-01, PNorm = 39.8303, GNorm = 1.3315, lr_0 = 8.4288e-04
Loss = 5.8173e-01, PNorm = 39.8493, GNorm = 5.3456, lr_0 = 8.4663e-04
Loss = 5.6592e-01, PNorm = 39.8669, GNorm = 2.3252, lr_0 = 8.5038e-04
Loss = 6.0605e-01, PNorm = 39.8866, GNorm = 2.0141, lr_0 = 8.5413e-04
Loss = 5.2788e-01, PNorm = 39.9062, GNorm = 1.4606, lr_0 = 8.5788e-04
Loss = 5.8654e-01, PNorm = 39.9270, GNorm = 1.3328, lr_0 = 8.6163e-04
Loss = 5.5761e-01, PNorm = 39.9411, GNorm = 4.6482, lr_0 = 8.6538e-04
Loss = 6.0826e-01, PNorm = 39.9619, GNorm = 2.1619, lr_0 = 8.6913e-04
Loss = 6.0753e-01, PNorm = 39.9773, GNorm = 3.1982, lr_0 = 8.7288e-04
Loss = 6.0105e-01, PNorm = 39.9929, GNorm = 4.0377, lr_0 = 8.7663e-04
Loss = 6.0566e-01, PNorm = 40.0197, GNorm = 2.7368, lr_0 = 8.8038e-04
Loss = 5.9151e-01, PNorm = 40.0357, GNorm = 2.5757, lr_0 = 8.8413e-04
Loss = 5.9254e-01, PNorm = 40.0467, GNorm = 2.6053, lr_0 = 8.8788e-04
Loss = 5.5574e-01, PNorm = 40.0577, GNorm = 2.8059, lr_0 = 8.9163e-04
Loss = 6.7800e-01, PNorm = 40.0732, GNorm = 6.2042, lr_0 = 8.9538e-04
Loss = 6.9630e-01, PNorm = 40.0872, GNorm = 2.0263, lr_0 = 8.9913e-04
Loss = 6.4827e-01, PNorm = 40.1054, GNorm = 3.2363, lr_0 = 9.0288e-04
Loss = 7.0145e-01, PNorm = 40.1255, GNorm = 1.6380, lr_0 = 9.0663e-04
Loss = 6.5154e-01, PNorm = 40.1499, GNorm = 3.2395, lr_0 = 9.1038e-04
Loss = 5.8298e-01, PNorm = 40.1681, GNorm = 1.3905, lr_0 = 9.1413e-04
Loss = 5.5811e-01, PNorm = 40.2002, GNorm = 2.2940, lr_0 = 9.1788e-04
Loss = 5.4650e-01, PNorm = 40.2253, GNorm = 2.8129, lr_0 = 9.2163e-04
Loss = 5.5585e-01, PNorm = 40.2448, GNorm = 1.0220, lr_0 = 9.2538e-04
Loss = 6.2820e-01, PNorm = 40.2703, GNorm = 6.2748, lr_0 = 9.2913e-04
Loss = 5.8241e-01, PNorm = 40.3020, GNorm = 2.9724, lr_0 = 9.3288e-04
Loss = 6.1683e-01, PNorm = 40.3206, GNorm = 1.0846, lr_0 = 9.3663e-04
Loss = 6.0419e-01, PNorm = 40.3375, GNorm = 2.3570, lr_0 = 9.4038e-04
Loss = 5.3684e-01, PNorm = 40.3539, GNorm = 2.7730, lr_0 = 9.4413e-04
Loss = 5.9785e-01, PNorm = 40.3694, GNorm = 1.2576, lr_0 = 9.4788e-04
Loss = 5.6205e-01, PNorm = 40.3826, GNorm = 1.5514, lr_0 = 9.5163e-04
Loss = 6.0289e-01, PNorm = 40.3987, GNorm = 1.2456, lr_0 = 9.5538e-04
Loss = 5.7336e-01, PNorm = 40.4096, GNorm = 1.4704, lr_0 = 9.5913e-04
Loss = 4.7780e-01, PNorm = 40.4252, GNorm = 3.7026, lr_0 = 9.6288e-04
Loss = 5.9256e-01, PNorm = 40.4361, GNorm = 4.8728, lr_0 = 9.6663e-04
Loss = 6.6552e-01, PNorm = 40.4727, GNorm = 1.4941, lr_0 = 9.7038e-04
Loss = 5.7977e-01, PNorm = 40.4958, GNorm = 2.3456, lr_0 = 9.7413e-04
Loss = 5.7350e-01, PNorm = 40.5175, GNorm = 1.7381, lr_0 = 9.7788e-04
Loss = 5.8534e-01, PNorm = 40.5446, GNorm = 1.5206, lr_0 = 9.8163e-04
Loss = 5.5992e-01, PNorm = 40.5686, GNorm = 3.2355, lr_0 = 9.8537e-04
Loss = 5.5471e-01, PNorm = 40.5889, GNorm = 1.7191, lr_0 = 9.8912e-04
Loss = 5.7798e-01, PNorm = 40.6206, GNorm = 2.3809, lr_0 = 9.9288e-04
Loss = 5.8125e-01, PNorm = 40.6428, GNorm = 2.0406, lr_0 = 9.9663e-04
Loss = 5.3051e-01, PNorm = 40.6638, GNorm = 1.1699, lr_0 = 9.9993e-04
Validation mae = 0.131417
Epoch 2
Loss = 5.8220e-01, PNorm = 40.6862, GNorm = 2.5285, lr_0 = 9.9925e-04
Loss = 5.9140e-01, PNorm = 40.7049, GNorm = 1.2160, lr_0 = 9.9856e-04
Loss = 5.4007e-01, PNorm = 40.7225, GNorm = 1.7621, lr_0 = 9.9788e-04
Loss = 5.4891e-01, PNorm = 40.7429, GNorm = 1.9543, lr_0 = 9.9719e-04
Loss = 5.5992e-01, PNorm = 40.7651, GNorm = 2.1934, lr_0 = 9.9651e-04
Loss = 5.7653e-01, PNorm = 40.7838, GNorm = 2.4195, lr_0 = 9.9583e-04
Loss = 5.4646e-01, PNorm = 40.8153, GNorm = 1.2527, lr_0 = 9.9515e-04
Loss = 5.5532e-01, PNorm = 40.8353, GNorm = 2.4888, lr_0 = 9.9446e-04
Loss = 5.3505e-01, PNorm = 40.8484, GNorm = 1.1320, lr_0 = 9.9378e-04
Loss = 4.8512e-01, PNorm = 40.8675, GNorm = 2.1127, lr_0 = 9.9310e-04
Loss = 6.3102e-01, PNorm = 40.9022, GNorm = 1.3964, lr_0 = 9.9242e-04
Loss = 6.1801e-01, PNorm = 40.9195, GNorm = 2.5174, lr_0 = 9.9174e-04
Loss = 5.3474e-01, PNorm = 40.9490, GNorm = 4.8057, lr_0 = 9.9106e-04
Loss = 5.4491e-01, PNorm = 40.9742, GNorm = 2.9217, lr_0 = 9.9038e-04
Loss = 5.5206e-01, PNorm = 40.9968, GNorm = 1.2121, lr_0 = 9.8971e-04
Loss = 6.7229e-01, PNorm = 41.0190, GNorm = 4.4446, lr_0 = 9.8903e-04
Loss = 5.7856e-01, PNorm = 41.0394, GNorm = 2.0793, lr_0 = 9.8835e-04
Loss = 5.3449e-01, PNorm = 41.0645, GNorm = 2.5158, lr_0 = 9.8767e-04
Loss = 5.4690e-01, PNorm = 41.0780, GNorm = 3.5857, lr_0 = 9.8700e-04
Loss = 5.3789e-01, PNorm = 41.1017, GNorm = 0.8240, lr_0 = 9.8632e-04
Loss = 5.3263e-01, PNorm = 41.1232, GNorm = 1.9142, lr_0 = 9.8564e-04
Loss = 5.2426e-01, PNorm = 41.1415, GNorm = 5.5958, lr_0 = 9.8497e-04
Loss = 5.3461e-01, PNorm = 41.1541, GNorm = 4.7107, lr_0 = 9.8429e-04
Loss = 5.8198e-01, PNorm = 41.1783, GNorm = 1.0829, lr_0 = 9.8362e-04
Loss = 6.4324e-01, PNorm = 41.2058, GNorm = 1.9284, lr_0 = 9.8295e-04
Loss = 5.7008e-01, PNorm = 41.2189, GNorm = 1.3435, lr_0 = 9.8227e-04
Loss = 5.8183e-01, PNorm = 41.2456, GNorm = 1.5550, lr_0 = 9.8160e-04
Loss = 4.8584e-01, PNorm = 41.2650, GNorm = 1.3252, lr_0 = 9.8093e-04
Loss = 5.2284e-01, PNorm = 41.2849, GNorm = 1.5793, lr_0 = 9.8026e-04
Loss = 5.8426e-01, PNorm = 41.2969, GNorm = 1.4802, lr_0 = 9.7958e-04
Loss = 5.7310e-01, PNorm = 41.3184, GNorm = 1.4498, lr_0 = 9.7891e-04
Loss = 5.3516e-01, PNorm = 41.3396, GNorm = 1.2869, lr_0 = 9.7824e-04
Loss = 5.3823e-01, PNorm = 41.3621, GNorm = 0.9241, lr_0 = 9.7757e-04
Loss = 4.9619e-01, PNorm = 41.3872, GNorm = 2.1436, lr_0 = 9.7690e-04
Loss = 5.1511e-01, PNorm = 41.4024, GNorm = 0.8859, lr_0 = 9.7623e-04
Loss = 5.7210e-01, PNorm = 41.4312, GNorm = 2.2976, lr_0 = 9.7556e-04
Loss = 4.8391e-01, PNorm = 41.4554, GNorm = 1.1309, lr_0 = 9.7490e-04
Loss = 5.4515e-01, PNorm = 41.4741, GNorm = 1.2282, lr_0 = 9.7423e-04
Loss = 5.2374e-01, PNorm = 41.5032, GNorm = 1.2967, lr_0 = 9.7356e-04
Loss = 5.3998e-01, PNorm = 41.5299, GNorm = 4.6524, lr_0 = 9.7289e-04
Loss = 5.4130e-01, PNorm = 41.5473, GNorm = 2.6710, lr_0 = 9.7223e-04
Loss = 4.7768e-01, PNorm = 41.5675, GNorm = 1.5839, lr_0 = 9.7156e-04
Loss = 4.7423e-01, PNorm = 41.5900, GNorm = 1.0302, lr_0 = 9.7090e-04
Loss = 4.4852e-01, PNorm = 41.6056, GNorm = 1.2454, lr_0 = 9.7023e-04
Loss = 6.0931e-01, PNorm = 41.6211, GNorm = 1.7655, lr_0 = 9.6957e-04
Loss = 5.3633e-01, PNorm = 41.6419, GNorm = 1.4334, lr_0 = 9.6890e-04
Loss = 5.9466e-01, PNorm = 41.6590, GNorm = 1.6642, lr_0 = 9.6824e-04
Loss = 4.6972e-01, PNorm = 41.6763, GNorm = 1.3869, lr_0 = 9.6757e-04
Loss = 5.0983e-01, PNorm = 41.7038, GNorm = 3.0531, lr_0 = 9.6691e-04
Loss = 6.1041e-01, PNorm = 41.7218, GNorm = 1.0137, lr_0 = 9.6625e-04
Loss = 5.1827e-01, PNorm = 41.7410, GNorm = 1.0453, lr_0 = 9.6559e-04
Loss = 5.2492e-01, PNorm = 41.7549, GNorm = 0.8941, lr_0 = 9.6493e-04
Loss = 4.9720e-01, PNorm = 41.7681, GNorm = 2.2896, lr_0 = 9.6427e-04
Loss = 5.1336e-01, PNorm = 41.7809, GNorm = 2.2647, lr_0 = 9.6360e-04
Loss = 5.1967e-01, PNorm = 41.7930, GNorm = 1.1095, lr_0 = 9.6294e-04
Loss = 5.5269e-01, PNorm = 41.8206, GNorm = 1.3126, lr_0 = 9.6228e-04
Loss = 5.4927e-01, PNorm = 41.8543, GNorm = 2.3644, lr_0 = 9.6163e-04
Loss = 5.5059e-01, PNorm = 41.8742, GNorm = 1.3659, lr_0 = 9.6097e-04
Loss = 5.5478e-01, PNorm = 41.8908, GNorm = 1.3425, lr_0 = 9.6031e-04
Loss = 5.6722e-01, PNorm = 41.9059, GNorm = 1.7419, lr_0 = 9.5965e-04
Loss = 5.2546e-01, PNorm = 41.9252, GNorm = 3.0758, lr_0 = 9.5899e-04
Loss = 6.6264e-01, PNorm = 41.9384, GNorm = 3.4269, lr_0 = 9.5834e-04
Loss = 5.8152e-01, PNorm = 41.9586, GNorm = 3.5903, lr_0 = 9.5768e-04
Loss = 5.3533e-01, PNorm = 41.9842, GNorm = 1.9223, lr_0 = 9.5702e-04
Loss = 5.6870e-01, PNorm = 42.0049, GNorm = 1.7592, lr_0 = 9.5637e-04
Loss = 4.7476e-01, PNorm = 42.0272, GNorm = 1.1466, lr_0 = 9.5571e-04
Loss = 5.5242e-01, PNorm = 42.0403, GNorm = 2.3013, lr_0 = 9.5506e-04
Loss = 5.0944e-01, PNorm = 42.0560, GNorm = 1.4721, lr_0 = 9.5440e-04
Loss = 4.5233e-01, PNorm = 42.0807, GNorm = 1.6181, lr_0 = 9.5375e-04
Loss = 4.5168e-01, PNorm = 42.1001, GNorm = 2.0703, lr_0 = 9.5310e-04
Loss = 5.8214e-01, PNorm = 42.1120, GNorm = 1.3129, lr_0 = 9.5244e-04
Loss = 5.1889e-01, PNorm = 42.1301, GNorm = 1.3980, lr_0 = 9.5179e-04
Loss = 5.4315e-01, PNorm = 42.1490, GNorm = 2.3652, lr_0 = 9.5114e-04
Loss = 5.5985e-01, PNorm = 42.1702, GNorm = 1.7272, lr_0 = 9.5049e-04
Loss = 5.8235e-01, PNorm = 42.1890, GNorm = 2.4839, lr_0 = 9.4984e-04
Loss = 4.7406e-01, PNorm = 42.2130, GNorm = 0.7903, lr_0 = 9.4919e-04
Loss = 4.9731e-01, PNorm = 42.2381, GNorm = 1.1264, lr_0 = 9.4854e-04
Loss = 5.3562e-01, PNorm = 42.2619, GNorm = 0.8336, lr_0 = 9.4789e-04
Loss = 5.1225e-01, PNorm = 42.2801, GNorm = 1.5213, lr_0 = 9.4724e-04
Loss = 4.4911e-01, PNorm = 42.2939, GNorm = 1.2317, lr_0 = 9.4659e-04
Loss = 5.6017e-01, PNorm = 42.3083, GNorm = 1.5622, lr_0 = 9.4594e-04
Loss = 5.6439e-01, PNorm = 42.3307, GNorm = 2.6964, lr_0 = 9.4529e-04
Loss = 5.9391e-01, PNorm = 42.3515, GNorm = 1.6416, lr_0 = 9.4464e-04
Loss = 4.6297e-01, PNorm = 42.3646, GNorm = 1.1575, lr_0 = 9.4400e-04
Loss = 5.4320e-01, PNorm = 42.3813, GNorm = 2.6468, lr_0 = 9.4335e-04
Loss = 5.8636e-01, PNorm = 42.3975, GNorm = 3.4252, lr_0 = 9.4270e-04
Loss = 5.3425e-01, PNorm = 42.4169, GNorm = 1.2313, lr_0 = 9.4206e-04
Loss = 5.2606e-01, PNorm = 42.4479, GNorm = 2.0452, lr_0 = 9.4141e-04
Loss = 5.5823e-01, PNorm = 42.4594, GNorm = 3.1627, lr_0 = 9.4077e-04
Loss = 5.5285e-01, PNorm = 42.4807, GNorm = 2.1570, lr_0 = 9.4012e-04
Loss = 5.5109e-01, PNorm = 42.5036, GNorm = 1.2583, lr_0 = 9.3948e-04
Loss = 5.4579e-01, PNorm = 42.5391, GNorm = 1.6861, lr_0 = 9.3884e-04
Loss = 5.3823e-01, PNorm = 42.5713, GNorm = 2.1190, lr_0 = 9.3819e-04
Loss = 5.4053e-01, PNorm = 42.5934, GNorm = 1.1135, lr_0 = 9.3755e-04
Loss = 5.2134e-01, PNorm = 42.6159, GNorm = 1.4749, lr_0 = 9.3691e-04
Loss = 4.8556e-01, PNorm = 42.6301, GNorm = 4.0241, lr_0 = 9.3627e-04
Loss = 5.5088e-01, PNorm = 42.6510, GNorm = 2.2259, lr_0 = 9.3562e-04
Loss = 5.4812e-01, PNorm = 42.6760, GNorm = 1.1409, lr_0 = 9.3498e-04
Loss = 5.4775e-01, PNorm = 42.6878, GNorm = 1.6168, lr_0 = 9.3434e-04
Loss = 5.4477e-01, PNorm = 42.7092, GNorm = 2.9061, lr_0 = 9.3370e-04
Loss = 5.9842e-01, PNorm = 42.7375, GNorm = 2.1837, lr_0 = 9.3306e-04
Loss = 5.8569e-01, PNorm = 42.7596, GNorm = 2.4700, lr_0 = 9.3242e-04
Loss = 5.2441e-01, PNorm = 42.7817, GNorm = 0.8615, lr_0 = 9.3178e-04
Loss = 5.2061e-01, PNorm = 42.7972, GNorm = 1.8175, lr_0 = 9.3115e-04
Loss = 5.6305e-01, PNorm = 42.8257, GNorm = 2.1394, lr_0 = 9.3051e-04
Loss = 5.4448e-01, PNorm = 42.8432, GNorm = 1.3146, lr_0 = 9.2987e-04
Loss = 5.3678e-01, PNorm = 42.8634, GNorm = 1.3168, lr_0 = 9.2923e-04
Loss = 4.6231e-01, PNorm = 42.8928, GNorm = 0.9891, lr_0 = 9.2860e-04
Loss = 5.0604e-01, PNorm = 42.9186, GNorm = 1.7882, lr_0 = 9.2796e-04
Loss = 5.1868e-01, PNorm = 42.9322, GNorm = 1.3672, lr_0 = 9.2733e-04
Loss = 5.1544e-01, PNorm = 42.9496, GNorm = 1.8880, lr_0 = 9.2669e-04
Loss = 4.3261e-01, PNorm = 42.9571, GNorm = 1.9756, lr_0 = 9.2606e-04
Loss = 4.6563e-01, PNorm = 42.9788, GNorm = 1.1482, lr_0 = 9.2542e-04
Loss = 5.0085e-01, PNorm = 42.9999, GNorm = 1.5289, lr_0 = 9.2479e-04
Loss = 5.0247e-01, PNorm = 43.0194, GNorm = 1.6223, lr_0 = 9.2415e-04
Loss = 4.5773e-01, PNorm = 43.0425, GNorm = 1.3623, lr_0 = 9.2352e-04
Loss = 5.2476e-01, PNorm = 43.0659, GNorm = 3.3044, lr_0 = 9.2289e-04
Loss = 6.4152e-01, PNorm = 43.0877, GNorm = 4.6510, lr_0 = 9.2226e-04
Loss = 5.6183e-01, PNorm = 43.1252, GNorm = 1.2282, lr_0 = 9.2162e-04
Loss = 6.5760e-01, PNorm = 43.1617, GNorm = 2.0179, lr_0 = 9.2099e-04
Validation mae = 0.126003
Epoch 3
Loss = 4.7994e-01, PNorm = 43.1750, GNorm = 2.5020, lr_0 = 9.2036e-04
Loss = 5.4147e-01, PNorm = 43.1882, GNorm = 1.4299, lr_0 = 9.1973e-04
Loss = 5.1775e-01, PNorm = 43.2074, GNorm = 2.4465, lr_0 = 9.1910e-04
Loss = 5.0668e-01, PNorm = 43.2326, GNorm = 1.7639, lr_0 = 9.1847e-04
Loss = 5.6464e-01, PNorm = 43.2611, GNorm = 1.2205, lr_0 = 9.1784e-04
Loss = 5.2088e-01, PNorm = 43.2850, GNorm = 1.4869, lr_0 = 9.1721e-04
Loss = 5.1456e-01, PNorm = 43.2972, GNorm = 1.4917, lr_0 = 9.1658e-04
Loss = 6.0233e-01, PNorm = 43.3084, GNorm = 2.7278, lr_0 = 9.1596e-04
Loss = 5.0265e-01, PNorm = 43.3290, GNorm = 1.2143, lr_0 = 9.1533e-04
Loss = 4.1965e-01, PNorm = 43.3545, GNorm = 1.6742, lr_0 = 9.1470e-04
Loss = 5.0026e-01, PNorm = 43.3743, GNorm = 1.3391, lr_0 = 9.1408e-04
Loss = 5.0084e-01, PNorm = 43.3893, GNorm = 1.2895, lr_0 = 9.1345e-04
Loss = 5.1991e-01, PNorm = 43.4073, GNorm = 1.3051, lr_0 = 9.1282e-04
Loss = 5.6681e-01, PNorm = 43.4286, GNorm = 1.6847, lr_0 = 9.1220e-04
Loss = 5.0606e-01, PNorm = 43.4558, GNorm = 2.6557, lr_0 = 9.1157e-04
Loss = 4.7795e-01, PNorm = 43.4862, GNorm = 1.5075, lr_0 = 9.1095e-04
Loss = 5.4391e-01, PNorm = 43.5168, GNorm = 0.9342, lr_0 = 9.1032e-04
Loss = 5.0667e-01, PNorm = 43.5538, GNorm = 1.4754, lr_0 = 9.0970e-04
Loss = 5.7346e-01, PNorm = 43.5709, GNorm = 1.4418, lr_0 = 9.0908e-04
Loss = 4.8723e-01, PNorm = 43.5889, GNorm = 2.0465, lr_0 = 9.0846e-04
Loss = 5.1225e-01, PNorm = 43.6046, GNorm = 1.8069, lr_0 = 9.0783e-04
Loss = 5.0254e-01, PNorm = 43.6285, GNorm = 1.6383, lr_0 = 9.0721e-04
Loss = 5.5784e-01, PNorm = 43.6390, GNorm = 1.2157, lr_0 = 9.0659e-04
Loss = 5.8597e-01, PNorm = 43.6635, GNorm = 1.5712, lr_0 = 9.0597e-04
Loss = 5.5520e-01, PNorm = 43.6805, GNorm = 2.6701, lr_0 = 9.0535e-04
Loss = 5.1784e-01, PNorm = 43.7043, GNorm = 1.2566, lr_0 = 9.0473e-04
Loss = 4.7087e-01, PNorm = 43.7231, GNorm = 3.6382, lr_0 = 9.0411e-04
Loss = 5.2310e-01, PNorm = 43.7434, GNorm = 2.3431, lr_0 = 9.0349e-04
Loss = 4.8800e-01, PNorm = 43.7611, GNorm = 3.4074, lr_0 = 9.0287e-04
Loss = 5.5401e-01, PNorm = 43.7884, GNorm = 1.3965, lr_0 = 9.0225e-04
Loss = 5.1663e-01, PNorm = 43.8075, GNorm = 1.4860, lr_0 = 9.0163e-04
Loss = 5.5916e-01, PNorm = 43.8335, GNorm = 0.9421, lr_0 = 9.0102e-04
Loss = 4.6129e-01, PNorm = 43.8510, GNorm = 1.0306, lr_0 = 9.0040e-04
Loss = 4.6799e-01, PNorm = 43.8700, GNorm = 1.6146, lr_0 = 8.9978e-04
Loss = 4.6942e-01, PNorm = 43.8913, GNorm = 0.8968, lr_0 = 8.9916e-04
Loss = 4.6614e-01, PNorm = 43.9116, GNorm = 1.1585, lr_0 = 8.9855e-04
Loss = 5.1617e-01, PNorm = 43.9318, GNorm = 1.4855, lr_0 = 8.9793e-04
Loss = 4.5707e-01, PNorm = 43.9485, GNorm = 1.9851, lr_0 = 8.9732e-04
Loss = 4.6868e-01, PNorm = 43.9602, GNorm = 1.2138, lr_0 = 8.9670e-04
Loss = 4.5459e-01, PNorm = 43.9842, GNorm = 1.6500, lr_0 = 8.9609e-04
Loss = 4.6235e-01, PNorm = 44.0015, GNorm = 1.7641, lr_0 = 8.9548e-04
Loss = 5.0815e-01, PNorm = 44.0120, GNorm = 1.7752, lr_0 = 8.9486e-04
Loss = 4.7086e-01, PNorm = 44.0238, GNorm = 3.4734, lr_0 = 8.9425e-04
Loss = 5.6187e-01, PNorm = 44.0467, GNorm = 1.2840, lr_0 = 8.9364e-04
Loss = 4.7402e-01, PNorm = 44.0669, GNorm = 1.5626, lr_0 = 8.9302e-04
Loss = 5.2051e-01, PNorm = 44.0858, GNorm = 2.1419, lr_0 = 8.9241e-04
Loss = 5.2831e-01, PNorm = 44.1040, GNorm = 2.6638, lr_0 = 8.9180e-04
Loss = 6.4882e-01, PNorm = 44.1276, GNorm = 1.8801, lr_0 = 8.9119e-04
Loss = 4.7945e-01, PNorm = 44.1378, GNorm = 1.0047, lr_0 = 8.9058e-04
Loss = 5.1212e-01, PNorm = 44.1577, GNorm = 1.3267, lr_0 = 8.8997e-04
Loss = 4.4222e-01, PNorm = 44.1697, GNorm = 2.1120, lr_0 = 8.8936e-04
Loss = 4.4108e-01, PNorm = 44.1998, GNorm = 2.0845, lr_0 = 8.8875e-04
Loss = 5.3404e-01, PNorm = 44.2182, GNorm = 1.7187, lr_0 = 8.8814e-04
Loss = 5.1402e-01, PNorm = 44.2409, GNorm = 1.5633, lr_0 = 8.8753e-04
Loss = 5.2265e-01, PNorm = 44.2548, GNorm = 1.6337, lr_0 = 8.8693e-04
Loss = 4.8154e-01, PNorm = 44.2613, GNorm = 1.1040, lr_0 = 8.8632e-04
Loss = 5.5111e-01, PNorm = 44.2834, GNorm = 1.8725, lr_0 = 8.8571e-04
Loss = 5.1267e-01, PNorm = 44.3010, GNorm = 2.7370, lr_0 = 8.8510e-04
Loss = 5.0629e-01, PNorm = 44.3238, GNorm = 1.3429, lr_0 = 8.8450e-04
Loss = 4.8198e-01, PNorm = 44.3529, GNorm = 2.2945, lr_0 = 8.8389e-04
Loss = 5.2801e-01, PNorm = 44.3731, GNorm = 1.8991, lr_0 = 8.8329e-04
Loss = 4.3200e-01, PNorm = 44.3919, GNorm = 1.3305, lr_0 = 8.8268e-04
Loss = 4.8735e-01, PNorm = 44.4208, GNorm = 2.3807, lr_0 = 8.8208e-04
Loss = 4.9446e-01, PNorm = 44.4482, GNorm = 0.9555, lr_0 = 8.8147e-04
Loss = 5.2598e-01, PNorm = 44.4682, GNorm = 1.0424, lr_0 = 8.8087e-04
Loss = 5.7240e-01, PNorm = 44.5021, GNorm = 1.4789, lr_0 = 8.8026e-04
Loss = 4.7328e-01, PNorm = 44.5311, GNorm = 1.9893, lr_0 = 8.7966e-04
Loss = 5.0272e-01, PNorm = 44.5538, GNorm = 2.2232, lr_0 = 8.7906e-04
Loss = 5.1475e-01, PNorm = 44.5754, GNorm = 1.1235, lr_0 = 8.7846e-04
Loss = 4.8911e-01, PNorm = 44.5939, GNorm = 1.8284, lr_0 = 8.7785e-04
Loss = 4.9180e-01, PNorm = 44.6102, GNorm = 2.8666, lr_0 = 8.7725e-04
Loss = 5.0047e-01, PNorm = 44.6246, GNorm = 1.0777, lr_0 = 8.7665e-04
Loss = 5.6371e-01, PNorm = 44.6435, GNorm = 2.4267, lr_0 = 8.7605e-04
Loss = 4.7762e-01, PNorm = 44.6609, GNorm = 1.1838, lr_0 = 8.7545e-04
Loss = 5.4980e-01, PNorm = 44.6937, GNorm = 1.4064, lr_0 = 8.7485e-04
Loss = 4.2896e-01, PNorm = 44.7047, GNorm = 2.0191, lr_0 = 8.7425e-04
Loss = 4.7551e-01, PNorm = 44.7207, GNorm = 3.2681, lr_0 = 8.7365e-04
Loss = 5.6104e-01, PNorm = 44.7390, GNorm = 1.5241, lr_0 = 8.7306e-04
Loss = 5.5462e-01, PNorm = 44.7619, GNorm = 1.4491, lr_0 = 8.7246e-04
Loss = 5.1153e-01, PNorm = 44.7975, GNorm = 1.4479, lr_0 = 8.7186e-04
Loss = 5.3463e-01, PNorm = 44.8175, GNorm = 1.0402, lr_0 = 8.7126e-04
Loss = 5.6029e-01, PNorm = 44.8366, GNorm = 1.5808, lr_0 = 8.7067e-04
Loss = 5.3323e-01, PNorm = 44.8610, GNorm = 1.2465, lr_0 = 8.7007e-04
Loss = 5.8223e-01, PNorm = 44.8830, GNorm = 1.0713, lr_0 = 8.6947e-04
Loss = 5.0539e-01, PNorm = 44.9042, GNorm = 1.3539, lr_0 = 8.6888e-04
Loss = 5.1543e-01, PNorm = 44.9203, GNorm = 1.8556, lr_0 = 8.6828e-04
Loss = 5.1345e-01, PNorm = 44.9414, GNorm = 1.4047, lr_0 = 8.6769e-04
Loss = 4.7216e-01, PNorm = 44.9605, GNorm = 1.2494, lr_0 = 8.6709e-04
Loss = 4.4355e-01, PNorm = 44.9805, GNorm = 2.8686, lr_0 = 8.6650e-04
Loss = 4.9155e-01, PNorm = 44.9940, GNorm = 1.4426, lr_0 = 8.6590e-04
Loss = 4.8101e-01, PNorm = 45.0184, GNorm = 2.0597, lr_0 = 8.6531e-04
Loss = 4.8438e-01, PNorm = 45.0379, GNorm = 1.3438, lr_0 = 8.6472e-04
Loss = 4.1933e-01, PNorm = 45.0617, GNorm = 1.6066, lr_0 = 8.6413e-04
Loss = 4.7232e-01, PNorm = 45.0795, GNorm = 1.3413, lr_0 = 8.6353e-04
Loss = 4.4969e-01, PNorm = 45.0966, GNorm = 0.9174, lr_0 = 8.6294e-04
Loss = 4.6200e-01, PNorm = 45.1148, GNorm = 1.9126, lr_0 = 8.6235e-04
Loss = 5.1339e-01, PNorm = 45.1277, GNorm = 0.8860, lr_0 = 8.6176e-04
Loss = 5.1928e-01, PNorm = 45.1466, GNorm = 1.7665, lr_0 = 8.6117e-04
Loss = 4.8100e-01, PNorm = 45.1641, GNorm = 1.2606, lr_0 = 8.6058e-04
Loss = 5.5032e-01, PNorm = 45.1818, GNorm = 1.0768, lr_0 = 8.5999e-04
Loss = 4.5951e-01, PNorm = 45.2002, GNorm = 1.7556, lr_0 = 8.5940e-04
Loss = 4.7334e-01, PNorm = 45.2182, GNorm = 0.9039, lr_0 = 8.5881e-04
Loss = 4.3279e-01, PNorm = 45.2421, GNorm = 1.4720, lr_0 = 8.5823e-04
Loss = 4.6544e-01, PNorm = 45.2593, GNorm = 2.0381, lr_0 = 8.5764e-04
Loss = 5.1158e-01, PNorm = 45.2773, GNorm = 2.0934, lr_0 = 8.5705e-04
Loss = 5.4064e-01, PNorm = 45.2957, GNorm = 1.3903, lr_0 = 8.5646e-04
Loss = 4.8975e-01, PNorm = 45.3222, GNorm = 1.5082, lr_0 = 8.5588e-04
Loss = 5.3129e-01, PNorm = 45.3477, GNorm = 1.3244, lr_0 = 8.5529e-04
Loss = 5.4919e-01, PNorm = 45.3615, GNorm = 1.5046, lr_0 = 8.5470e-04
Loss = 4.8679e-01, PNorm = 45.3799, GNorm = 1.3151, lr_0 = 8.5412e-04
Loss = 5.6390e-01, PNorm = 45.4036, GNorm = 1.1128, lr_0 = 8.5353e-04
Loss = 4.8303e-01, PNorm = 45.4237, GNorm = 1.2138, lr_0 = 8.5295e-04
Loss = 4.8684e-01, PNorm = 45.4409, GNorm = 1.7077, lr_0 = 8.5236e-04
Loss = 4.6734e-01, PNorm = 45.4573, GNorm = 1.5615, lr_0 = 8.5178e-04
Loss = 4.7356e-01, PNorm = 45.4684, GNorm = 0.9415, lr_0 = 8.5120e-04
Loss = 5.6020e-01, PNorm = 45.4956, GNorm = 1.6437, lr_0 = 8.5061e-04
Loss = 5.0520e-01, PNorm = 45.5200, GNorm = 1.4348, lr_0 = 8.5003e-04
Loss = 5.4613e-01, PNorm = 45.5467, GNorm = 1.2135, lr_0 = 8.4945e-04
Loss = 5.2186e-01, PNorm = 45.5668, GNorm = 1.2934, lr_0 = 8.4887e-04
Loss = 4.8337e-01, PNorm = 45.5834, GNorm = 1.7602, lr_0 = 8.4828e-04
Validation mae = 0.124821
Epoch 4
Loss = 5.3625e-01, PNorm = 45.6009, GNorm = 2.1315, lr_0 = 8.4770e-04
Loss = 4.6975e-01, PNorm = 45.6158, GNorm = 1.8534, lr_0 = 8.4712e-04
Loss = 5.3612e-01, PNorm = 45.6326, GNorm = 1.3368, lr_0 = 8.4654e-04
Loss = 5.1293e-01, PNorm = 45.6434, GNorm = 1.5456, lr_0 = 8.4596e-04
Loss = 6.1868e-01, PNorm = 45.6715, GNorm = 3.7846, lr_0 = 8.4538e-04
Loss = 4.9195e-01, PNorm = 45.6939, GNorm = 1.0240, lr_0 = 8.4480e-04
Loss = 4.9256e-01, PNorm = 45.7196, GNorm = 1.1409, lr_0 = 8.4423e-04
Loss = 5.4643e-01, PNorm = 45.7290, GNorm = 2.5031, lr_0 = 8.4365e-04
Loss = 4.3412e-01, PNorm = 45.7458, GNorm = 2.1070, lr_0 = 8.4307e-04
Loss = 4.6084e-01, PNorm = 45.7692, GNorm = 1.1179, lr_0 = 8.4249e-04
Loss = 4.6374e-01, PNorm = 45.7883, GNorm = 1.8371, lr_0 = 8.4191e-04
Loss = 5.2160e-01, PNorm = 45.8044, GNorm = 2.0661, lr_0 = 8.4134e-04
Loss = 4.5250e-01, PNorm = 45.8238, GNorm = 1.7770, lr_0 = 8.4076e-04
Loss = 3.8724e-01, PNorm = 45.8485, GNorm = 1.4564, lr_0 = 8.4019e-04
Loss = 5.9946e-01, PNorm = 45.8630, GNorm = 1.3006, lr_0 = 8.3961e-04
Loss = 5.0669e-01, PNorm = 45.8818, GNorm = 1.1741, lr_0 = 8.3903e-04
Loss = 4.6572e-01, PNorm = 45.8954, GNorm = 0.9783, lr_0 = 8.3846e-04
Loss = 4.9076e-01, PNorm = 45.9194, GNorm = 1.2156, lr_0 = 8.3789e-04
Loss = 4.8236e-01, PNorm = 45.9392, GNorm = 1.3628, lr_0 = 8.3731e-04
Loss = 4.8929e-01, PNorm = 45.9537, GNorm = 1.6176, lr_0 = 8.3674e-04
Loss = 4.3398e-01, PNorm = 45.9740, GNorm = 1.9477, lr_0 = 8.3616e-04
Loss = 4.3389e-01, PNorm = 45.9854, GNorm = 1.0111, lr_0 = 8.3559e-04
Loss = 4.7016e-01, PNorm = 45.9974, GNorm = 1.9012, lr_0 = 8.3502e-04
Loss = 4.8689e-01, PNorm = 46.0088, GNorm = 3.3501, lr_0 = 8.3445e-04
Loss = 4.9748e-01, PNorm = 46.0208, GNorm = 1.5383, lr_0 = 8.3388e-04
Loss = 4.8654e-01, PNorm = 46.0501, GNorm = 2.7009, lr_0 = 8.3330e-04
Loss = 5.6911e-01, PNorm = 46.0748, GNorm = 1.6681, lr_0 = 8.3273e-04
Loss = 5.7814e-01, PNorm = 46.1155, GNorm = 1.0739, lr_0 = 8.3216e-04
Loss = 5.5332e-01, PNorm = 46.1372, GNorm = 1.3811, lr_0 = 8.3159e-04
Loss = 4.9186e-01, PNorm = 46.1456, GNorm = 1.4316, lr_0 = 8.3102e-04
Loss = 5.4969e-01, PNorm = 46.1672, GNorm = 1.5038, lr_0 = 8.3045e-04
Loss = 5.1713e-01, PNorm = 46.1861, GNorm = 1.1822, lr_0 = 8.2988e-04
Loss = 5.1130e-01, PNorm = 46.1924, GNorm = 1.5274, lr_0 = 8.2932e-04
Loss = 4.9002e-01, PNorm = 46.2086, GNorm = 2.8012, lr_0 = 8.2875e-04
Loss = 5.3267e-01, PNorm = 46.2233, GNorm = 1.8052, lr_0 = 8.2818e-04
Loss = 5.1936e-01, PNorm = 46.2447, GNorm = 1.4751, lr_0 = 8.2761e-04
Loss = 4.4008e-01, PNorm = 46.2665, GNorm = 1.6234, lr_0 = 8.2705e-04
Loss = 4.6676e-01, PNorm = 46.2720, GNorm = 1.3497, lr_0 = 8.2648e-04
Loss = 5.5155e-01, PNorm = 46.2879, GNorm = 1.1485, lr_0 = 8.2591e-04
Loss = 5.4464e-01, PNorm = 46.3005, GNorm = 0.7675, lr_0 = 8.2535e-04
Loss = 5.2652e-01, PNorm = 46.3157, GNorm = 0.9785, lr_0 = 8.2478e-04
Loss = 5.4080e-01, PNorm = 46.3351, GNorm = 2.6279, lr_0 = 8.2422e-04
Loss = 5.3007e-01, PNorm = 46.3604, GNorm = 1.3416, lr_0 = 8.2365e-04
Loss = 4.9812e-01, PNorm = 46.3850, GNorm = 2.3859, lr_0 = 8.2309e-04
Loss = 4.3883e-01, PNorm = 46.4122, GNorm = 1.3591, lr_0 = 8.2252e-04
Loss = 4.7771e-01, PNorm = 46.4249, GNorm = 1.2964, lr_0 = 8.2196e-04
Loss = 4.6070e-01, PNorm = 46.4378, GNorm = 1.0094, lr_0 = 8.2140e-04
Loss = 4.8870e-01, PNorm = 46.4561, GNorm = 2.1147, lr_0 = 8.2084e-04
Loss = 4.3193e-01, PNorm = 46.4749, GNorm = 1.1323, lr_0 = 8.2027e-04
Loss = 4.2262e-01, PNorm = 46.4845, GNorm = 1.2269, lr_0 = 8.1971e-04
Loss = 5.0253e-01, PNorm = 46.5021, GNorm = 1.3753, lr_0 = 8.1915e-04
Loss = 4.5867e-01, PNorm = 46.5170, GNorm = 1.6130, lr_0 = 8.1859e-04
Loss = 4.5995e-01, PNorm = 46.5342, GNorm = 1.1844, lr_0 = 8.1803e-04
Loss = 4.7871e-01, PNorm = 46.5456, GNorm = 1.2362, lr_0 = 8.1747e-04
Loss = 4.8352e-01, PNorm = 46.5659, GNorm = 1.1834, lr_0 = 8.1691e-04
Loss = 4.9174e-01, PNorm = 46.5843, GNorm = 2.2299, lr_0 = 8.1635e-04
Loss = 4.8220e-01, PNorm = 46.5952, GNorm = 1.0634, lr_0 = 8.1579e-04
Loss = 4.7235e-01, PNorm = 46.6089, GNorm = 1.9919, lr_0 = 8.1523e-04
Loss = 5.2392e-01, PNorm = 46.6168, GNorm = 1.7827, lr_0 = 8.1467e-04
Loss = 4.1160e-01, PNorm = 46.6357, GNorm = 1.5952, lr_0 = 8.1411e-04
Loss = 5.2059e-01, PNorm = 46.6550, GNorm = 1.5130, lr_0 = 8.1355e-04
Loss = 4.9312e-01, PNorm = 46.6742, GNorm = 1.4260, lr_0 = 8.1300e-04
Loss = 4.3252e-01, PNorm = 46.6911, GNorm = 2.0196, lr_0 = 8.1244e-04
Loss = 5.4437e-01, PNorm = 46.7112, GNorm = 2.1190, lr_0 = 8.1188e-04
Loss = 5.1735e-01, PNorm = 46.7404, GNorm = 1.8022, lr_0 = 8.1133e-04
Loss = 5.1497e-01, PNorm = 46.7704, GNorm = 1.2128, lr_0 = 8.1077e-04
Loss = 4.4416e-01, PNorm = 46.7915, GNorm = 0.9828, lr_0 = 8.1022e-04
Loss = 4.0089e-01, PNorm = 46.8084, GNorm = 0.9960, lr_0 = 8.0966e-04
Loss = 4.5854e-01, PNorm = 46.8235, GNorm = 1.0372, lr_0 = 8.0911e-04
Loss = 4.2984e-01, PNorm = 46.8383, GNorm = 1.5269, lr_0 = 8.0855e-04
Loss = 4.8957e-01, PNorm = 46.8505, GNorm = 1.4486, lr_0 = 8.0800e-04
Loss = 4.8701e-01, PNorm = 46.8691, GNorm = 1.2330, lr_0 = 8.0745e-04
Loss = 4.2461e-01, PNorm = 46.8891, GNorm = 1.0459, lr_0 = 8.0689e-04
Loss = 4.5574e-01, PNorm = 46.9056, GNorm = 1.0502, lr_0 = 8.0634e-04
Loss = 5.0279e-01, PNorm = 46.9195, GNorm = 1.5598, lr_0 = 8.0579e-04
Loss = 5.3191e-01, PNorm = 46.9401, GNorm = 2.4809, lr_0 = 8.0523e-04
Loss = 4.7815e-01, PNorm = 46.9647, GNorm = 1.7049, lr_0 = 8.0468e-04
Loss = 4.6794e-01, PNorm = 46.9815, GNorm = 1.0877, lr_0 = 8.0413e-04
Loss = 4.2088e-01, PNorm = 47.0028, GNorm = 1.8601, lr_0 = 8.0358e-04
Loss = 5.8053e-01, PNorm = 47.0183, GNorm = 1.2789, lr_0 = 8.0303e-04
Loss = 4.6869e-01, PNorm = 47.0346, GNorm = 1.0631, lr_0 = 8.0248e-04
Loss = 5.0800e-01, PNorm = 47.0624, GNorm = 2.7273, lr_0 = 8.0193e-04
Loss = 5.5057e-01, PNorm = 47.0840, GNorm = 1.0885, lr_0 = 8.0138e-04
Loss = 4.4063e-01, PNorm = 47.1034, GNorm = 1.5862, lr_0 = 8.0083e-04
Loss = 5.2443e-01, PNorm = 47.1239, GNorm = 1.8350, lr_0 = 8.0028e-04
Loss = 4.9880e-01, PNorm = 47.1384, GNorm = 1.0203, lr_0 = 7.9974e-04
Loss = 4.4336e-01, PNorm = 47.1532, GNorm = 1.0307, lr_0 = 7.9919e-04
Loss = 4.4805e-01, PNorm = 47.1647, GNorm = 1.7342, lr_0 = 7.9864e-04
Loss = 4.9816e-01, PNorm = 47.1819, GNorm = 1.4316, lr_0 = 7.9809e-04
Loss = 4.6415e-01, PNorm = 47.2001, GNorm = 0.9198, lr_0 = 7.9755e-04
Loss = 4.3916e-01, PNorm = 47.2114, GNorm = 1.1473, lr_0 = 7.9700e-04
Loss = 5.5655e-01, PNorm = 47.2227, GNorm = 2.1271, lr_0 = 7.9645e-04
Loss = 5.0761e-01, PNorm = 47.2409, GNorm = 1.7061, lr_0 = 7.9591e-04
Loss = 4.9397e-01, PNorm = 47.2656, GNorm = 3.0639, lr_0 = 7.9536e-04
Loss = 4.7374e-01, PNorm = 47.2880, GNorm = 0.9699, lr_0 = 7.9482e-04
Loss = 4.9575e-01, PNorm = 47.3058, GNorm = 2.2527, lr_0 = 7.9427e-04
Loss = 5.7159e-01, PNorm = 47.3269, GNorm = 0.9591, lr_0 = 7.9373e-04
Loss = 4.5453e-01, PNorm = 47.3477, GNorm = 1.2614, lr_0 = 7.9319e-04
Loss = 5.2330e-01, PNorm = 47.3676, GNorm = 1.5565, lr_0 = 7.9264e-04
Loss = 4.9031e-01, PNorm = 47.3882, GNorm = 1.7896, lr_0 = 7.9210e-04
Loss = 4.4845e-01, PNorm = 47.4134, GNorm = 1.2222, lr_0 = 7.9156e-04
Loss = 4.9234e-01, PNorm = 47.4248, GNorm = 1.3179, lr_0 = 7.9101e-04
Loss = 4.3275e-01, PNorm = 47.4301, GNorm = 1.4004, lr_0 = 7.9047e-04
Loss = 4.4664e-01, PNorm = 47.4454, GNorm = 2.3210, lr_0 = 7.8993e-04
Loss = 4.8379e-01, PNorm = 47.4595, GNorm = 2.1982, lr_0 = 7.8939e-04
Loss = 4.4750e-01, PNorm = 47.4692, GNorm = 0.8954, lr_0 = 7.8885e-04
Loss = 5.2982e-01, PNorm = 47.4839, GNorm = 2.7306, lr_0 = 7.8831e-04
Loss = 5.3947e-01, PNorm = 47.5040, GNorm = 1.2381, lr_0 = 7.8777e-04
Loss = 4.8641e-01, PNorm = 47.5299, GNorm = 1.7017, lr_0 = 7.8723e-04
Loss = 4.8050e-01, PNorm = 47.5497, GNorm = 1.9674, lr_0 = 7.8669e-04
Loss = 4.8332e-01, PNorm = 47.5587, GNorm = 1.4922, lr_0 = 7.8615e-04
Loss = 4.3345e-01, PNorm = 47.5754, GNorm = 1.2649, lr_0 = 7.8561e-04
Loss = 4.8716e-01, PNorm = 47.5871, GNorm = 1.2629, lr_0 = 7.8507e-04
Loss = 4.3744e-01, PNorm = 47.5973, GNorm = 1.6625, lr_0 = 7.8454e-04
Loss = 4.9205e-01, PNorm = 47.6143, GNorm = 2.5117, lr_0 = 7.8400e-04
Loss = 4.5576e-01, PNorm = 47.6334, GNorm = 1.3162, lr_0 = 7.8346e-04
Loss = 4.5823e-01, PNorm = 47.6573, GNorm = 1.3307, lr_0 = 7.8293e-04
Loss = 4.6046e-01, PNorm = 47.6769, GNorm = 1.2296, lr_0 = 7.8239e-04
Loss = 4.8988e-01, PNorm = 47.6869, GNorm = 1.4612, lr_0 = 7.8185e-04
Loss = 5.1709e-01, PNorm = 47.7014, GNorm = 0.9257, lr_0 = 7.8132e-04
Validation mae = 0.122065
Epoch 5
Loss = 4.1871e-01, PNorm = 47.7257, GNorm = 1.3017, lr_0 = 7.8078e-04
Loss = 4.2034e-01, PNorm = 47.7479, GNorm = 1.9797, lr_0 = 7.8025e-04
Loss = 4.9654e-01, PNorm = 47.7709, GNorm = 2.1449, lr_0 = 7.7971e-04
Loss = 4.5479e-01, PNorm = 47.7978, GNorm = 0.9439, lr_0 = 7.7918e-04
Loss = 4.8507e-01, PNorm = 47.8092, GNorm = 1.9896, lr_0 = 7.7864e-04
Loss = 4.6243e-01, PNorm = 47.8149, GNorm = 1.0326, lr_0 = 7.7811e-04
Loss = 4.9688e-01, PNorm = 47.8344, GNorm = 1.5688, lr_0 = 7.7758e-04
Loss = 4.7297e-01, PNorm = 47.8532, GNorm = 2.6610, lr_0 = 7.7705e-04
Loss = 4.7224e-01, PNorm = 47.8708, GNorm = 1.5622, lr_0 = 7.7651e-04
Loss = 4.3264e-01, PNorm = 47.8931, GNorm = 1.8085, lr_0 = 7.7598e-04
Loss = 4.7661e-01, PNorm = 47.9124, GNorm = 1.2883, lr_0 = 7.7545e-04
Loss = 4.2075e-01, PNorm = 47.9368, GNorm = 1.0886, lr_0 = 7.7492e-04
Loss = 5.0831e-01, PNorm = 47.9588, GNorm = 1.4836, lr_0 = 7.7439e-04
Loss = 4.9023e-01, PNorm = 47.9802, GNorm = 1.9987, lr_0 = 7.7386e-04
Loss = 4.4902e-01, PNorm = 48.0027, GNorm = 1.5307, lr_0 = 7.7333e-04
Loss = 5.3007e-01, PNorm = 48.0226, GNorm = 1.0354, lr_0 = 7.7280e-04
Loss = 5.1048e-01, PNorm = 48.0376, GNorm = 1.3565, lr_0 = 7.7227e-04
Loss = 4.7784e-01, PNorm = 48.0574, GNorm = 1.3654, lr_0 = 7.7174e-04
Loss = 4.8201e-01, PNorm = 48.0725, GNorm = 1.5109, lr_0 = 7.7121e-04
Loss = 4.5011e-01, PNorm = 48.0885, GNorm = 0.9313, lr_0 = 7.7068e-04
Loss = 4.7374e-01, PNorm = 48.1075, GNorm = 1.6117, lr_0 = 7.7015e-04
Loss = 4.7620e-01, PNorm = 48.1279, GNorm = 2.3588, lr_0 = 7.6963e-04
Loss = 4.4905e-01, PNorm = 48.1410, GNorm = 2.2224, lr_0 = 7.6910e-04
Loss = 4.4402e-01, PNorm = 48.1542, GNorm = 1.5505, lr_0 = 7.6857e-04
Loss = 4.9874e-01, PNorm = 48.1735, GNorm = 2.1058, lr_0 = 7.6805e-04
Loss = 4.0280e-01, PNorm = 48.1854, GNorm = 1.4817, lr_0 = 7.6752e-04
Loss = 4.6657e-01, PNorm = 48.1988, GNorm = 1.0366, lr_0 = 7.6699e-04
Loss = 4.7953e-01, PNorm = 48.2144, GNorm = 1.6822, lr_0 = 7.6647e-04
Loss = 5.4124e-01, PNorm = 48.2394, GNorm = 1.1568, lr_0 = 7.6594e-04
Loss = 5.1589e-01, PNorm = 48.2591, GNorm = 1.2183, lr_0 = 7.6542e-04
Loss = 4.4005e-01, PNorm = 48.2765, GNorm = 1.3648, lr_0 = 7.6489e-04
Loss = 4.7048e-01, PNorm = 48.2917, GNorm = 1.2043, lr_0 = 7.6437e-04
Loss = 5.3958e-01, PNorm = 48.3151, GNorm = 3.1986, lr_0 = 7.6385e-04
Loss = 4.7720e-01, PNorm = 48.3427, GNorm = 1.8398, lr_0 = 7.6332e-04
Loss = 4.9526e-01, PNorm = 48.3796, GNorm = 1.0979, lr_0 = 7.6280e-04
Loss = 5.6252e-01, PNorm = 48.4092, GNorm = 1.9733, lr_0 = 7.6228e-04
Loss = 4.5683e-01, PNorm = 48.4462, GNorm = 1.5803, lr_0 = 7.6176e-04
Loss = 4.4372e-01, PNorm = 48.4639, GNorm = 1.3315, lr_0 = 7.6123e-04
Loss = 4.7470e-01, PNorm = 48.4823, GNorm = 1.1941, lr_0 = 7.6071e-04
Loss = 4.2125e-01, PNorm = 48.4894, GNorm = 1.1657, lr_0 = 7.6019e-04
Loss = 4.6147e-01, PNorm = 48.5029, GNorm = 1.5021, lr_0 = 7.5967e-04
Loss = 4.3449e-01, PNorm = 48.5190, GNorm = 1.4574, lr_0 = 7.5915e-04
Loss = 4.8020e-01, PNorm = 48.5340, GNorm = 0.9926, lr_0 = 7.5863e-04
Loss = 4.6529e-01, PNorm = 48.5514, GNorm = 2.7455, lr_0 = 7.5811e-04
Loss = 3.9451e-01, PNorm = 48.5621, GNorm = 1.2902, lr_0 = 7.5759e-04
Loss = 4.6753e-01, PNorm = 48.5706, GNorm = 1.1075, lr_0 = 7.5707e-04
Loss = 4.5614e-01, PNorm = 48.5869, GNorm = 0.8991, lr_0 = 7.5655e-04
Loss = 5.2592e-01, PNorm = 48.5983, GNorm = 2.0295, lr_0 = 7.5603e-04
Loss = 4.3923e-01, PNorm = 48.6123, GNorm = 1.6984, lr_0 = 7.5552e-04
Loss = 4.2136e-01, PNorm = 48.6256, GNorm = 2.1946, lr_0 = 7.5500e-04
Loss = 4.5006e-01, PNorm = 48.6401, GNorm = 1.2984, lr_0 = 7.5448e-04
Loss = 4.4249e-01, PNorm = 48.6571, GNorm = 1.8915, lr_0 = 7.5397e-04
Loss = 4.1893e-01, PNorm = 48.6740, GNorm = 1.9005, lr_0 = 7.5345e-04
Loss = 5.1507e-01, PNorm = 48.6912, GNorm = 1.5089, lr_0 = 7.5293e-04
Loss = 4.9919e-01, PNorm = 48.7090, GNorm = 1.4961, lr_0 = 7.5242e-04
Loss = 4.7486e-01, PNorm = 48.7209, GNorm = 1.5244, lr_0 = 7.5190e-04
Loss = 4.1462e-01, PNorm = 48.7429, GNorm = 0.9363, lr_0 = 7.5139e-04
Loss = 4.3620e-01, PNorm = 48.7626, GNorm = 1.7193, lr_0 = 7.5087e-04
Loss = 5.0416e-01, PNorm = 48.7740, GNorm = 1.1521, lr_0 = 7.5036e-04
Loss = 5.3153e-01, PNorm = 48.7822, GNorm = 1.9145, lr_0 = 7.4984e-04
Loss = 4.0805e-01, PNorm = 48.8087, GNorm = 1.3551, lr_0 = 7.4933e-04
Loss = 4.4948e-01, PNorm = 48.8280, GNorm = 1.4385, lr_0 = 7.4882e-04
Loss = 4.7918e-01, PNorm = 48.8434, GNorm = 1.0417, lr_0 = 7.4830e-04
Loss = 5.2615e-01, PNorm = 48.8617, GNorm = 1.2543, lr_0 = 7.4779e-04
Loss = 4.6830e-01, PNorm = 48.8857, GNorm = 1.6733, lr_0 = 7.4728e-04
Loss = 4.5962e-01, PNorm = 48.9013, GNorm = 1.1888, lr_0 = 7.4677e-04
Loss = 4.6555e-01, PNorm = 48.9153, GNorm = 1.1209, lr_0 = 7.4625e-04
Loss = 5.4440e-01, PNorm = 48.9359, GNorm = 1.6253, lr_0 = 7.4574e-04
Loss = 4.2907e-01, PNorm = 48.9462, GNorm = 1.8521, lr_0 = 7.4523e-04
Loss = 4.9788e-01, PNorm = 48.9588, GNorm = 1.4916, lr_0 = 7.4472e-04
Loss = 4.0732e-01, PNorm = 48.9722, GNorm = 1.2213, lr_0 = 7.4421e-04
Loss = 4.2615e-01, PNorm = 48.9809, GNorm = 1.0360, lr_0 = 7.4370e-04
Loss = 4.3792e-01, PNorm = 48.9932, GNorm = 0.9817, lr_0 = 7.4319e-04
Loss = 4.2798e-01, PNorm = 49.0077, GNorm = 1.0296, lr_0 = 7.4268e-04
Loss = 4.9881e-01, PNorm = 49.0254, GNorm = 1.4978, lr_0 = 7.4217e-04
Loss = 5.1309e-01, PNorm = 49.0430, GNorm = 1.2013, lr_0 = 7.4167e-04
Loss = 4.4475e-01, PNorm = 49.0599, GNorm = 1.0798, lr_0 = 7.4116e-04
Loss = 5.0574e-01, PNorm = 49.0745, GNorm = 3.7401, lr_0 = 7.4065e-04
Loss = 5.5102e-01, PNorm = 49.1016, GNorm = 0.9208, lr_0 = 7.4014e-04
Loss = 4.7132e-01, PNorm = 49.1193, GNorm = 1.1220, lr_0 = 7.3964e-04
Loss = 4.7840e-01, PNorm = 49.1369, GNorm = 1.5067, lr_0 = 7.3913e-04
Loss = 4.8525e-01, PNorm = 49.1435, GNorm = 0.9609, lr_0 = 7.3862e-04
Loss = 4.8110e-01, PNorm = 49.1622, GNorm = 2.0782, lr_0 = 7.3812e-04
Loss = 4.5604e-01, PNorm = 49.1811, GNorm = 0.9428, lr_0 = 7.3761e-04
Loss = 5.1795e-01, PNorm = 49.2020, GNorm = 1.9261, lr_0 = 7.3711e-04
Loss = 4.6548e-01, PNorm = 49.2199, GNorm = 1.8802, lr_0 = 7.3660e-04
Loss = 4.6106e-01, PNorm = 49.2372, GNorm = 2.2416, lr_0 = 7.3610e-04
Loss = 4.3945e-01, PNorm = 49.2473, GNorm = 1.1283, lr_0 = 7.3559e-04
Loss = 4.7085e-01, PNorm = 49.2613, GNorm = 1.3321, lr_0 = 7.3509e-04
Loss = 4.7763e-01, PNorm = 49.2783, GNorm = 1.8020, lr_0 = 7.3458e-04
Loss = 4.7486e-01, PNorm = 49.2944, GNorm = 1.2570, lr_0 = 7.3408e-04
Loss = 4.5797e-01, PNorm = 49.3070, GNorm = 1.6251, lr_0 = 7.3358e-04
Loss = 4.4855e-01, PNorm = 49.3197, GNorm = 1.9770, lr_0 = 7.3308e-04
Loss = 4.2453e-01, PNorm = 49.3341, GNorm = 1.2624, lr_0 = 7.3257e-04
Loss = 4.9694e-01, PNorm = 49.3531, GNorm = 1.6234, lr_0 = 7.3207e-04
Loss = 4.3469e-01, PNorm = 49.3698, GNorm = 2.2824, lr_0 = 7.3157e-04
Loss = 4.3357e-01, PNorm = 49.3915, GNorm = 1.3000, lr_0 = 7.3107e-04
Loss = 4.4558e-01, PNorm = 49.4050, GNorm = 1.2062, lr_0 = 7.3057e-04
Loss = 4.7915e-01, PNorm = 49.4144, GNorm = 0.8469, lr_0 = 7.3007e-04
Loss = 4.7044e-01, PNorm = 49.4301, GNorm = 0.9994, lr_0 = 7.2957e-04
Loss = 4.8100e-01, PNorm = 49.4579, GNorm = 1.2327, lr_0 = 7.2907e-04
Loss = 4.7638e-01, PNorm = 49.4703, GNorm = 1.3089, lr_0 = 7.2857e-04
Loss = 4.7094e-01, PNorm = 49.4849, GNorm = 1.4085, lr_0 = 7.2807e-04
Loss = 4.9511e-01, PNorm = 49.4978, GNorm = 1.4612, lr_0 = 7.2757e-04
Loss = 4.7378e-01, PNorm = 49.5136, GNorm = 1.3471, lr_0 = 7.2707e-04
Loss = 4.7963e-01, PNorm = 49.5252, GNorm = 1.7061, lr_0 = 7.2657e-04
Loss = 4.6356e-01, PNorm = 49.5411, GNorm = 0.9401, lr_0 = 7.2608e-04
Loss = 4.7357e-01, PNorm = 49.5617, GNorm = 2.0381, lr_0 = 7.2558e-04
Loss = 5.1376e-01, PNorm = 49.5830, GNorm = 2.2089, lr_0 = 7.2508e-04
Loss = 3.8613e-01, PNorm = 49.5957, GNorm = 1.2549, lr_0 = 7.2458e-04
Loss = 4.7277e-01, PNorm = 49.6088, GNorm = 1.4559, lr_0 = 7.2409e-04
Loss = 5.8877e-01, PNorm = 49.6222, GNorm = 1.3864, lr_0 = 7.2359e-04
Loss = 4.7849e-01, PNorm = 49.6366, GNorm = 1.0747, lr_0 = 7.2310e-04
Loss = 4.9240e-01, PNorm = 49.6523, GNorm = 2.4245, lr_0 = 7.2260e-04
Loss = 4.5464e-01, PNorm = 49.6713, GNorm = 1.7167, lr_0 = 7.2211e-04
Loss = 5.4333e-01, PNorm = 49.6858, GNorm = 0.9073, lr_0 = 7.2161e-04
Loss = 4.8007e-01, PNorm = 49.7015, GNorm = 1.0809, lr_0 = 7.2112e-04
Loss = 4.5213e-01, PNorm = 49.7185, GNorm = 1.8093, lr_0 = 7.2062e-04
Loss = 4.5128e-01, PNorm = 49.7357, GNorm = 1.1102, lr_0 = 7.2013e-04
Loss = 4.4504e-01, PNorm = 49.7492, GNorm = 1.4053, lr_0 = 7.1964e-04
Validation mae = 0.118703
Epoch 6
Loss = 4.4125e-01, PNorm = 49.7672, GNorm = 1.3040, lr_0 = 7.1914e-04
Loss = 4.8065e-01, PNorm = 49.7856, GNorm = 2.2106, lr_0 = 7.1865e-04
Loss = 4.3095e-01, PNorm = 49.8042, GNorm = 1.1349, lr_0 = 7.1816e-04
Loss = 4.7287e-01, PNorm = 49.8147, GNorm = 2.0328, lr_0 = 7.1767e-04
Loss = 5.0306e-01, PNorm = 49.8337, GNorm = 1.2302, lr_0 = 7.1717e-04
Loss = 4.2539e-01, PNorm = 49.8517, GNorm = 1.0250, lr_0 = 7.1668e-04
Loss = 4.1077e-01, PNorm = 49.8628, GNorm = 1.8253, lr_0 = 7.1619e-04
Loss = 4.7008e-01, PNorm = 49.8793, GNorm = 2.4255, lr_0 = 7.1570e-04
Loss = 5.4783e-01, PNorm = 49.8957, GNorm = 1.3596, lr_0 = 7.1521e-04
Loss = 3.8236e-01, PNorm = 49.9122, GNorm = 0.9390, lr_0 = 7.1472e-04
Loss = 4.2559e-01, PNorm = 49.9301, GNorm = 1.0613, lr_0 = 7.1423e-04
Loss = 4.2930e-01, PNorm = 49.9465, GNorm = 1.2250, lr_0 = 7.1374e-04
Loss = 4.6293e-01, PNorm = 49.9596, GNorm = 2.7460, lr_0 = 7.1325e-04
Loss = 4.8841e-01, PNorm = 49.9722, GNorm = 1.7113, lr_0 = 7.1277e-04
Loss = 4.0651e-01, PNorm = 49.9905, GNorm = 1.7524, lr_0 = 7.1228e-04
Loss = 4.8356e-01, PNorm = 50.0038, GNorm = 1.3215, lr_0 = 7.1179e-04
Loss = 4.5316e-01, PNorm = 50.0213, GNorm = 1.5720, lr_0 = 7.1130e-04
Loss = 5.3216e-01, PNorm = 50.0415, GNorm = 1.2295, lr_0 = 7.1081e-04
Loss = 4.3150e-01, PNorm = 50.0578, GNorm = 1.5247, lr_0 = 7.1033e-04
Loss = 4.3660e-01, PNorm = 50.0713, GNorm = 1.7626, lr_0 = 7.0984e-04
Loss = 5.1165e-01, PNorm = 50.0912, GNorm = 2.8113, lr_0 = 7.0935e-04
Loss = 4.9469e-01, PNorm = 50.1053, GNorm = 1.7609, lr_0 = 7.0887e-04
Loss = 4.6095e-01, PNorm = 50.1294, GNorm = 2.7574, lr_0 = 7.0838e-04
Loss = 4.6999e-01, PNorm = 50.1402, GNorm = 1.8297, lr_0 = 7.0790e-04
Loss = 4.6062e-01, PNorm = 50.1538, GNorm = 1.1510, lr_0 = 7.0741e-04
Loss = 5.0008e-01, PNorm = 50.1686, GNorm = 1.5379, lr_0 = 7.0693e-04
Loss = 4.3184e-01, PNorm = 50.1869, GNorm = 0.7722, lr_0 = 7.0644e-04
Loss = 4.3742e-01, PNorm = 50.2039, GNorm = 1.0025, lr_0 = 7.0596e-04
Loss = 4.1389e-01, PNorm = 50.2229, GNorm = 1.0249, lr_0 = 7.0548e-04
Loss = 5.0608e-01, PNorm = 50.2351, GNorm = 1.0949, lr_0 = 7.0499e-04
Loss = 4.4955e-01, PNorm = 50.2544, GNorm = 1.3314, lr_0 = 7.0451e-04
Loss = 4.8166e-01, PNorm = 50.2761, GNorm = 2.0830, lr_0 = 7.0403e-04
Loss = 4.6067e-01, PNorm = 50.3013, GNorm = 1.8779, lr_0 = 7.0354e-04
Loss = 4.6971e-01, PNorm = 50.3125, GNorm = 1.4574, lr_0 = 7.0306e-04
Loss = 5.5019e-01, PNorm = 50.3237, GNorm = 1.5206, lr_0 = 7.0258e-04
Loss = 4.5329e-01, PNorm = 50.3454, GNorm = 1.5613, lr_0 = 7.0210e-04
Loss = 4.2682e-01, PNorm = 50.3628, GNorm = 1.6050, lr_0 = 7.0162e-04
Loss = 4.5045e-01, PNorm = 50.3767, GNorm = 1.6006, lr_0 = 7.0114e-04
Loss = 4.8718e-01, PNorm = 50.3930, GNorm = 1.3402, lr_0 = 7.0066e-04
Loss = 4.3324e-01, PNorm = 50.4137, GNorm = 1.3075, lr_0 = 7.0018e-04
Loss = 4.7530e-01, PNorm = 50.4263, GNorm = 2.0497, lr_0 = 6.9970e-04
Loss = 4.4630e-01, PNorm = 50.4439, GNorm = 1.1817, lr_0 = 6.9922e-04
Loss = 4.6193e-01, PNorm = 50.4611, GNorm = 2.2197, lr_0 = 6.9874e-04
Loss = 4.4322e-01, PNorm = 50.4757, GNorm = 2.0995, lr_0 = 6.9826e-04
Loss = 4.2212e-01, PNorm = 50.4947, GNorm = 1.5608, lr_0 = 6.9778e-04
Loss = 4.3794e-01, PNorm = 50.5076, GNorm = 1.1412, lr_0 = 6.9730e-04
Loss = 4.4797e-01, PNorm = 50.5180, GNorm = 1.9669, lr_0 = 6.9683e-04
Loss = 4.5820e-01, PNorm = 50.5323, GNorm = 1.3892, lr_0 = 6.9635e-04
Loss = 4.0875e-01, PNorm = 50.5462, GNorm = 1.0132, lr_0 = 6.9587e-04
Loss = 4.0985e-01, PNorm = 50.5579, GNorm = 1.8435, lr_0 = 6.9540e-04
Loss = 4.5033e-01, PNorm = 50.5778, GNorm = 2.4526, lr_0 = 6.9492e-04
Loss = 4.5253e-01, PNorm = 50.5960, GNorm = 1.7384, lr_0 = 6.9444e-04
Loss = 4.2923e-01, PNorm = 50.6233, GNorm = 1.2627, lr_0 = 6.9397e-04
Loss = 3.9287e-01, PNorm = 50.6427, GNorm = 1.5659, lr_0 = 6.9349e-04
Loss = 5.0070e-01, PNorm = 50.6623, GNorm = 1.1568, lr_0 = 6.9302e-04
Loss = 4.4644e-01, PNorm = 50.6831, GNorm = 1.5185, lr_0 = 6.9254e-04
Loss = 5.3738e-01, PNorm = 50.6955, GNorm = 2.3818, lr_0 = 6.9207e-04
Loss = 4.9187e-01, PNorm = 50.7074, GNorm = 1.5911, lr_0 = 6.9159e-04
Loss = 3.7217e-01, PNorm = 50.7197, GNorm = 1.5133, lr_0 = 6.9112e-04
Loss = 4.4907e-01, PNorm = 50.7275, GNorm = 1.3631, lr_0 = 6.9065e-04
Loss = 5.0848e-01, PNorm = 50.7440, GNorm = 2.0365, lr_0 = 6.9017e-04
Loss = 4.7998e-01, PNorm = 50.7540, GNorm = 1.0475, lr_0 = 6.8970e-04
Loss = 4.4726e-01, PNorm = 50.7684, GNorm = 1.2721, lr_0 = 6.8923e-04
Loss = 4.7221e-01, PNorm = 50.7886, GNorm = 2.2989, lr_0 = 6.8876e-04
Loss = 4.8373e-01, PNorm = 50.8010, GNorm = 1.3036, lr_0 = 6.8828e-04
Loss = 3.6427e-01, PNorm = 50.8110, GNorm = 1.3192, lr_0 = 6.8781e-04
Loss = 4.8466e-01, PNorm = 50.8309, GNorm = 1.4854, lr_0 = 6.8734e-04
Loss = 4.8522e-01, PNorm = 50.8406, GNorm = 1.9897, lr_0 = 6.8687e-04
Loss = 4.4307e-01, PNorm = 50.8602, GNorm = 1.3496, lr_0 = 6.8640e-04
Loss = 4.3277e-01, PNorm = 50.8712, GNorm = 2.1028, lr_0 = 6.8593e-04
Loss = 4.1910e-01, PNorm = 50.8875, GNorm = 1.8090, lr_0 = 6.8546e-04
Loss = 4.1406e-01, PNorm = 50.9020, GNorm = 1.1496, lr_0 = 6.8499e-04
Loss = 3.9938e-01, PNorm = 50.9198, GNorm = 1.4014, lr_0 = 6.8452e-04
Loss = 4.3170e-01, PNorm = 50.9346, GNorm = 1.7448, lr_0 = 6.8405e-04
Loss = 4.7380e-01, PNorm = 50.9511, GNorm = 1.4071, lr_0 = 6.8358e-04
Loss = 4.1213e-01, PNorm = 50.9613, GNorm = 1.1400, lr_0 = 6.8312e-04
Loss = 4.6280e-01, PNorm = 50.9799, GNorm = 2.7272, lr_0 = 6.8265e-04
Loss = 4.5210e-01, PNorm = 50.9951, GNorm = 1.2437, lr_0 = 6.8218e-04
Loss = 5.1465e-01, PNorm = 51.0138, GNorm = 1.4921, lr_0 = 6.8171e-04
Loss = 4.8613e-01, PNorm = 51.0355, GNorm = 1.3877, lr_0 = 6.8125e-04
Loss = 4.2838e-01, PNorm = 51.0484, GNorm = 1.0962, lr_0 = 6.8078e-04
Loss = 4.9584e-01, PNorm = 51.0597, GNorm = 1.7072, lr_0 = 6.8031e-04
Loss = 4.9668e-01, PNorm = 51.0723, GNorm = 1.2276, lr_0 = 6.7985e-04
Loss = 3.9600e-01, PNorm = 51.0916, GNorm = 1.4848, lr_0 = 6.7938e-04
Loss = 4.6739e-01, PNorm = 51.1125, GNorm = 1.2017, lr_0 = 6.7892e-04
Loss = 3.8887e-01, PNorm = 51.1231, GNorm = 1.5414, lr_0 = 6.7845e-04
Loss = 4.7962e-01, PNorm = 51.1377, GNorm = 1.4067, lr_0 = 6.7799e-04
Loss = 4.8414e-01, PNorm = 51.1546, GNorm = 1.2521, lr_0 = 6.7752e-04
Loss = 4.8321e-01, PNorm = 51.1699, GNorm = 2.0801, lr_0 = 6.7706e-04
Loss = 4.4403e-01, PNorm = 51.1834, GNorm = 1.6648, lr_0 = 6.7659e-04
Loss = 5.4889e-01, PNorm = 51.1956, GNorm = 1.6729, lr_0 = 6.7613e-04
Loss = 4.5996e-01, PNorm = 51.2173, GNorm = 1.6614, lr_0 = 6.7567e-04
Loss = 4.6407e-01, PNorm = 51.2384, GNorm = 1.6090, lr_0 = 6.7520e-04
Loss = 5.0360e-01, PNorm = 51.2553, GNorm = 1.0753, lr_0 = 6.7474e-04
Loss = 4.6620e-01, PNorm = 51.2807, GNorm = 1.7504, lr_0 = 6.7428e-04
Loss = 4.0959e-01, PNorm = 51.2963, GNorm = 1.0099, lr_0 = 6.7382e-04
Loss = 4.2776e-01, PNorm = 51.3080, GNorm = 1.0148, lr_0 = 6.7335e-04
Loss = 4.7323e-01, PNorm = 51.3242, GNorm = 1.4399, lr_0 = 6.7289e-04
Loss = 5.4427e-01, PNorm = 51.3437, GNorm = 2.6004, lr_0 = 6.7243e-04
Loss = 3.9072e-01, PNorm = 51.3655, GNorm = 1.1932, lr_0 = 6.7197e-04
Loss = 4.1444e-01, PNorm = 51.3833, GNorm = 1.0975, lr_0 = 6.7151e-04
Loss = 4.9018e-01, PNorm = 51.3973, GNorm = 1.7468, lr_0 = 6.7105e-04
Loss = 4.6246e-01, PNorm = 51.4142, GNorm = 1.6351, lr_0 = 6.7059e-04
Loss = 4.0938e-01, PNorm = 51.4303, GNorm = 1.5275, lr_0 = 6.7013e-04
Loss = 5.0419e-01, PNorm = 51.4434, GNorm = 1.2421, lr_0 = 6.6967e-04
Loss = 4.5424e-01, PNorm = 51.4499, GNorm = 1.5423, lr_0 = 6.6921e-04
Loss = 4.9886e-01, PNorm = 51.4569, GNorm = 1.2293, lr_0 = 6.6876e-04
Loss = 4.2892e-01, PNorm = 51.4710, GNorm = 1.8508, lr_0 = 6.6830e-04
Loss = 4.8233e-01, PNorm = 51.4857, GNorm = 1.7168, lr_0 = 6.6784e-04
Loss = 4.5919e-01, PNorm = 51.4926, GNorm = 2.0809, lr_0 = 6.6738e-04
Loss = 4.4610e-01, PNorm = 51.4997, GNorm = 1.2477, lr_0 = 6.6693e-04
Loss = 4.6019e-01, PNorm = 51.5120, GNorm = 1.2896, lr_0 = 6.6647e-04
Loss = 4.4245e-01, PNorm = 51.5201, GNorm = 0.8036, lr_0 = 6.6601e-04
Loss = 4.5798e-01, PNorm = 51.5359, GNorm = 1.5987, lr_0 = 6.6556e-04
Loss = 4.0903e-01, PNorm = 51.5553, GNorm = 1.3429, lr_0 = 6.6510e-04
Loss = 4.5839e-01, PNorm = 51.5750, GNorm = 2.1681, lr_0 = 6.6464e-04
Loss = 3.9750e-01, PNorm = 51.5913, GNorm = 1.2987, lr_0 = 6.6419e-04
Loss = 4.3230e-01, PNorm = 51.6019, GNorm = 1.9067, lr_0 = 6.6373e-04
Loss = 3.9801e-01, PNorm = 51.6148, GNorm = 1.7254, lr_0 = 6.6328e-04
Loss = 4.8655e-01, PNorm = 51.6363, GNorm = 1.3835, lr_0 = 6.6282e-04
Validation mae = 0.117632
Epoch 7
Loss = 4.0514e-01, PNorm = 51.6481, GNorm = 1.6259, lr_0 = 6.6237e-04
Loss = 5.2092e-01, PNorm = 51.6613, GNorm = 1.9955, lr_0 = 6.6192e-04
Loss = 4.0107e-01, PNorm = 51.6720, GNorm = 1.2467, lr_0 = 6.6146e-04
Loss = 4.5739e-01, PNorm = 51.6894, GNorm = 1.2217, lr_0 = 6.6101e-04
Loss = 4.1349e-01, PNorm = 51.7006, GNorm = 3.1929, lr_0 = 6.6056e-04
Loss = 4.3667e-01, PNorm = 51.7089, GNorm = 1.3717, lr_0 = 6.6011e-04
Loss = 4.9948e-01, PNorm = 51.7220, GNorm = 0.9598, lr_0 = 6.5965e-04
Loss = 4.2307e-01, PNorm = 51.7341, GNorm = 0.9973, lr_0 = 6.5920e-04
Loss = 4.8363e-01, PNorm = 51.7483, GNorm = 1.2464, lr_0 = 6.5875e-04
Loss = 4.4446e-01, PNorm = 51.7679, GNorm = 1.3946, lr_0 = 6.5830e-04
Loss = 4.3380e-01, PNorm = 51.7831, GNorm = 1.6621, lr_0 = 6.5785e-04
Loss = 3.9926e-01, PNorm = 51.7893, GNorm = 1.2295, lr_0 = 6.5740e-04
Loss = 4.3600e-01, PNorm = 51.8061, GNorm = 1.4796, lr_0 = 6.5695e-04
Loss = 4.2794e-01, PNorm = 51.8208, GNorm = 1.3319, lr_0 = 6.5650e-04
Loss = 4.0900e-01, PNorm = 51.8327, GNorm = 1.6689, lr_0 = 6.5605e-04
Loss = 5.2538e-01, PNorm = 51.8409, GNorm = 1.6070, lr_0 = 6.5560e-04
Loss = 4.0498e-01, PNorm = 51.8497, GNorm = 1.2898, lr_0 = 6.5515e-04
Loss = 4.4875e-01, PNorm = 51.8637, GNorm = 1.0498, lr_0 = 6.5470e-04
Loss = 4.0405e-01, PNorm = 51.8758, GNorm = 1.0855, lr_0 = 6.5425e-04
Loss = 4.6035e-01, PNorm = 51.8942, GNorm = 1.7348, lr_0 = 6.5380e-04
Loss = 4.3846e-01, PNorm = 51.9107, GNorm = 1.2283, lr_0 = 6.5335e-04
Loss = 4.9532e-01, PNorm = 51.9234, GNorm = 1.7903, lr_0 = 6.5291e-04
Loss = 4.2646e-01, PNorm = 51.9389, GNorm = 2.1970, lr_0 = 6.5246e-04
Loss = 4.8137e-01, PNorm = 51.9496, GNorm = 1.0505, lr_0 = 6.5201e-04
Loss = 4.5772e-01, PNorm = 51.9627, GNorm = 1.6063, lr_0 = 6.5157e-04
Loss = 4.4790e-01, PNorm = 51.9778, GNorm = 1.3634, lr_0 = 6.5112e-04
Loss = 4.9879e-01, PNorm = 51.9999, GNorm = 1.1535, lr_0 = 6.5067e-04
Loss = 4.5451e-01, PNorm = 52.0118, GNorm = 1.0319, lr_0 = 6.5023e-04
Loss = 5.1542e-01, PNorm = 52.0169, GNorm = 2.1074, lr_0 = 6.4978e-04
Loss = 4.5467e-01, PNorm = 52.0297, GNorm = 1.3291, lr_0 = 6.4934e-04
Loss = 4.8337e-01, PNorm = 52.0438, GNorm = 1.0468, lr_0 = 6.4889e-04
Loss = 3.6163e-01, PNorm = 52.0574, GNorm = 1.2923, lr_0 = 6.4845e-04
Loss = 4.0779e-01, PNorm = 52.0710, GNorm = 1.1369, lr_0 = 6.4800e-04
Loss = 3.8261e-01, PNorm = 52.0809, GNorm = 0.7876, lr_0 = 6.4756e-04
Loss = 4.3590e-01, PNorm = 52.0969, GNorm = 2.4561, lr_0 = 6.4712e-04
Loss = 4.5455e-01, PNorm = 52.1118, GNorm = 1.1626, lr_0 = 6.4667e-04
Loss = 4.3322e-01, PNorm = 52.1212, GNorm = 1.8864, lr_0 = 6.4623e-04
Loss = 5.1759e-01, PNorm = 52.1350, GNorm = 1.2227, lr_0 = 6.4579e-04
Loss = 4.5022e-01, PNorm = 52.1575, GNorm = 0.8244, lr_0 = 6.4534e-04
Loss = 4.2379e-01, PNorm = 52.1670, GNorm = 2.0208, lr_0 = 6.4490e-04
Loss = 4.5113e-01, PNorm = 52.1790, GNorm = 1.5412, lr_0 = 6.4446e-04
Loss = 5.2300e-01, PNorm = 52.2010, GNorm = 1.5829, lr_0 = 6.4402e-04
Loss = 4.5318e-01, PNorm = 52.2162, GNorm = 1.4929, lr_0 = 6.4358e-04
Loss = 4.8019e-01, PNorm = 52.2378, GNorm = 1.2251, lr_0 = 6.4314e-04
Loss = 4.4911e-01, PNorm = 52.2515, GNorm = 1.2985, lr_0 = 6.4270e-04
Loss = 4.2445e-01, PNorm = 52.2664, GNorm = 1.7740, lr_0 = 6.4226e-04
Loss = 4.4526e-01, PNorm = 52.2721, GNorm = 1.1549, lr_0 = 6.4182e-04
Loss = 4.0636e-01, PNorm = 52.2920, GNorm = 1.1920, lr_0 = 6.4138e-04
Loss = 4.3134e-01, PNorm = 52.3081, GNorm = 1.0205, lr_0 = 6.4094e-04
Loss = 3.9940e-01, PNorm = 52.3217, GNorm = 0.9002, lr_0 = 6.4050e-04
Loss = 4.2957e-01, PNorm = 52.3390, GNorm = 0.9052, lr_0 = 6.4006e-04
Loss = 4.2278e-01, PNorm = 52.3486, GNorm = 1.1092, lr_0 = 6.3962e-04
Loss = 4.4727e-01, PNorm = 52.3560, GNorm = 2.1545, lr_0 = 6.3918e-04
Loss = 4.3557e-01, PNorm = 52.3661, GNorm = 1.0908, lr_0 = 6.3874e-04
Loss = 4.3777e-01, PNorm = 52.3796, GNorm = 1.5006, lr_0 = 6.3831e-04
Loss = 4.4822e-01, PNorm = 52.3862, GNorm = 1.2617, lr_0 = 6.3787e-04
Loss = 3.9446e-01, PNorm = 52.3972, GNorm = 2.2702, lr_0 = 6.3743e-04
Loss = 4.5424e-01, PNorm = 52.4054, GNorm = 1.0494, lr_0 = 6.3700e-04
Loss = 4.3945e-01, PNorm = 52.4203, GNorm = 1.5655, lr_0 = 6.3656e-04
Loss = 4.1847e-01, PNorm = 52.4360, GNorm = 1.4382, lr_0 = 6.3612e-04
Loss = 4.4429e-01, PNorm = 52.4518, GNorm = 1.3804, lr_0 = 6.3569e-04
Loss = 5.2275e-01, PNorm = 52.4686, GNorm = 1.2122, lr_0 = 6.3525e-04
Loss = 4.3733e-01, PNorm = 52.4829, GNorm = 1.1909, lr_0 = 6.3482e-04
Loss = 4.3798e-01, PNorm = 52.4986, GNorm = 2.3393, lr_0 = 6.3438e-04
Loss = 4.4258e-01, PNorm = 52.5107, GNorm = 2.4085, lr_0 = 6.3395e-04
Loss = 4.2223e-01, PNorm = 52.5215, GNorm = 2.3220, lr_0 = 6.3351e-04
Loss = 3.8680e-01, PNorm = 52.5312, GNorm = 1.3666, lr_0 = 6.3308e-04
Loss = 4.2512e-01, PNorm = 52.5484, GNorm = 1.4087, lr_0 = 6.3265e-04
Loss = 4.5742e-01, PNorm = 52.5636, GNorm = 1.3792, lr_0 = 6.3221e-04
Loss = 5.0799e-01, PNorm = 52.5798, GNorm = 1.4311, lr_0 = 6.3178e-04
Loss = 4.9093e-01, PNorm = 52.5961, GNorm = 1.9069, lr_0 = 6.3135e-04
Loss = 5.0553e-01, PNorm = 52.6176, GNorm = 2.1364, lr_0 = 6.3091e-04
Loss = 4.0696e-01, PNorm = 52.6280, GNorm = 1.0395, lr_0 = 6.3048e-04
Loss = 4.6297e-01, PNorm = 52.6447, GNorm = 1.5717, lr_0 = 6.3005e-04
Loss = 4.7122e-01, PNorm = 52.6616, GNorm = 1.5763, lr_0 = 6.2962e-04
Loss = 3.8169e-01, PNorm = 52.6742, GNorm = 1.1641, lr_0 = 6.2919e-04
Loss = 4.2426e-01, PNorm = 52.6921, GNorm = 1.1655, lr_0 = 6.2876e-04
Loss = 4.3844e-01, PNorm = 52.7101, GNorm = 1.9267, lr_0 = 6.2833e-04
Loss = 5.0267e-01, PNorm = 52.7298, GNorm = 1.8133, lr_0 = 6.2789e-04
Loss = 4.2673e-01, PNorm = 52.7472, GNorm = 0.9994, lr_0 = 6.2746e-04
Loss = 4.4184e-01, PNorm = 52.7636, GNorm = 0.8776, lr_0 = 6.2703e-04
Loss = 4.3512e-01, PNorm = 52.7703, GNorm = 1.1057, lr_0 = 6.2661e-04
Loss = 4.4984e-01, PNorm = 52.7795, GNorm = 1.5942, lr_0 = 6.2618e-04
Loss = 4.0050e-01, PNorm = 52.7874, GNorm = 1.1354, lr_0 = 6.2575e-04
Loss = 4.2650e-01, PNorm = 52.7955, GNorm = 1.3958, lr_0 = 6.2532e-04
Loss = 4.2486e-01, PNorm = 52.8075, GNorm = 1.4345, lr_0 = 6.2489e-04
Loss = 5.3238e-01, PNorm = 52.8203, GNorm = 2.6578, lr_0 = 6.2446e-04
Loss = 4.6499e-01, PNorm = 52.8357, GNorm = 1.2656, lr_0 = 6.2403e-04
Loss = 4.8312e-01, PNorm = 52.8475, GNorm = 1.5259, lr_0 = 6.2361e-04
Loss = 4.1840e-01, PNorm = 52.8615, GNorm = 1.2509, lr_0 = 6.2318e-04
Loss = 4.5370e-01, PNorm = 52.8757, GNorm = 1.0754, lr_0 = 6.2275e-04
Loss = 3.9211e-01, PNorm = 52.8906, GNorm = 1.1496, lr_0 = 6.2233e-04
Loss = 3.9674e-01, PNorm = 52.9053, GNorm = 1.2994, lr_0 = 6.2190e-04
Loss = 5.0119e-01, PNorm = 52.9199, GNorm = 1.0192, lr_0 = 6.2147e-04
Loss = 4.4448e-01, PNorm = 52.9333, GNorm = 1.5088, lr_0 = 6.2105e-04
Loss = 4.3627e-01, PNorm = 52.9483, GNorm = 1.4235, lr_0 = 6.2062e-04
Loss = 4.2886e-01, PNorm = 52.9530, GNorm = 0.9883, lr_0 = 6.2020e-04
Loss = 4.5893e-01, PNorm = 52.9685, GNorm = 1.3395, lr_0 = 6.1977e-04
Loss = 4.1290e-01, PNorm = 52.9787, GNorm = 1.3585, lr_0 = 6.1935e-04
Loss = 4.0779e-01, PNorm = 53.0004, GNorm = 1.2455, lr_0 = 6.1892e-04
Loss = 4.5281e-01, PNorm = 53.0099, GNorm = 1.5231, lr_0 = 6.1850e-04
Loss = 4.3822e-01, PNorm = 53.0206, GNorm = 1.5774, lr_0 = 6.1808e-04
Loss = 5.2742e-01, PNorm = 53.0358, GNorm = 1.1554, lr_0 = 6.1765e-04
Loss = 4.5167e-01, PNorm = 53.0525, GNorm = 1.0098, lr_0 = 6.1723e-04
Loss = 3.6749e-01, PNorm = 53.0618, GNorm = 2.2119, lr_0 = 6.1681e-04
Loss = 4.3283e-01, PNorm = 53.0713, GNorm = 1.5230, lr_0 = 6.1638e-04
Loss = 4.4937e-01, PNorm = 53.0837, GNorm = 1.4770, lr_0 = 6.1596e-04
Loss = 4.1590e-01, PNorm = 53.0994, GNorm = 1.2837, lr_0 = 6.1554e-04
Loss = 4.2926e-01, PNorm = 53.1092, GNorm = 1.2709, lr_0 = 6.1512e-04
Loss = 4.8696e-01, PNorm = 53.1228, GNorm = 2.3306, lr_0 = 6.1470e-04
Loss = 4.4093e-01, PNorm = 53.1361, GNorm = 1.1096, lr_0 = 6.1428e-04
Loss = 4.7558e-01, PNorm = 53.1591, GNorm = 1.7472, lr_0 = 6.1385e-04
Loss = 4.6616e-01, PNorm = 53.1757, GNorm = 1.5135, lr_0 = 6.1343e-04
Loss = 4.5950e-01, PNorm = 53.1882, GNorm = 1.4086, lr_0 = 6.1301e-04
Loss = 4.4108e-01, PNorm = 53.1966, GNorm = 1.8552, lr_0 = 6.1259e-04
Loss = 4.2084e-01, PNorm = 53.2079, GNorm = 0.8070, lr_0 = 6.1217e-04
Loss = 4.4728e-01, PNorm = 53.2169, GNorm = 1.9657, lr_0 = 6.1175e-04
Loss = 4.3351e-01, PNorm = 53.2264, GNorm = 1.5992, lr_0 = 6.1134e-04
Loss = 4.0352e-01, PNorm = 53.2407, GNorm = 1.4858, lr_0 = 6.1092e-04
Loss = 3.7792e-01, PNorm = 53.2556, GNorm = 2.0691, lr_0 = 6.1050e-04
Validation mae = 0.117173
Epoch 8
Loss = 4.0405e-01, PNorm = 53.2659, GNorm = 1.3393, lr_0 = 6.1008e-04
Loss = 5.0273e-01, PNorm = 53.2735, GNorm = 1.9010, lr_0 = 6.0966e-04
Loss = 4.3344e-01, PNorm = 53.2911, GNorm = 1.1529, lr_0 = 6.0924e-04
Loss = 4.1343e-01, PNorm = 53.3085, GNorm = 1.6035, lr_0 = 6.0883e-04
Loss = 4.1658e-01, PNorm = 53.3204, GNorm = 1.2907, lr_0 = 6.0841e-04
Loss = 3.6979e-01, PNorm = 53.3330, GNorm = 0.8360, lr_0 = 6.0799e-04
Loss = 4.1013e-01, PNorm = 53.3488, GNorm = 0.9261, lr_0 = 6.0758e-04
Loss = 4.5679e-01, PNorm = 53.3521, GNorm = 2.1928, lr_0 = 6.0716e-04
Loss = 4.6995e-01, PNorm = 53.3643, GNorm = 1.0159, lr_0 = 6.0674e-04
Loss = 4.4466e-01, PNorm = 53.3774, GNorm = 1.2686, lr_0 = 6.0633e-04
Loss = 4.0272e-01, PNorm = 53.3948, GNorm = 1.8538, lr_0 = 6.0591e-04
Loss = 4.6814e-01, PNorm = 53.4086, GNorm = 1.8302, lr_0 = 6.0550e-04
Loss = 4.8993e-01, PNorm = 53.4270, GNorm = 2.1114, lr_0 = 6.0508e-04
Loss = 4.2349e-01, PNorm = 53.4444, GNorm = 1.5363, lr_0 = 6.0467e-04
Loss = 4.5158e-01, PNorm = 53.4596, GNorm = 2.5654, lr_0 = 6.0425e-04
Loss = 4.0123e-01, PNorm = 53.4795, GNorm = 1.5841, lr_0 = 6.0384e-04
Loss = 4.4706e-01, PNorm = 53.4903, GNorm = 1.2166, lr_0 = 6.0343e-04
Loss = 4.3141e-01, PNorm = 53.5107, GNorm = 1.3294, lr_0 = 6.0301e-04
Loss = 4.5323e-01, PNorm = 53.5257, GNorm = 1.2317, lr_0 = 6.0260e-04
Loss = 3.9658e-01, PNorm = 53.5315, GNorm = 1.3857, lr_0 = 6.0219e-04
Loss = 4.1013e-01, PNorm = 53.5475, GNorm = 1.7105, lr_0 = 6.0178e-04
Loss = 4.9955e-01, PNorm = 53.5591, GNorm = 1.5735, lr_0 = 6.0136e-04
Loss = 3.7321e-01, PNorm = 53.5718, GNorm = 2.0679, lr_0 = 6.0095e-04
Loss = 5.5815e-01, PNorm = 53.5892, GNorm = 1.4760, lr_0 = 6.0054e-04
Loss = 4.5145e-01, PNorm = 53.6086, GNorm = 2.1611, lr_0 = 6.0013e-04
Loss = 4.3873e-01, PNorm = 53.6235, GNorm = 1.6046, lr_0 = 5.9972e-04
Loss = 4.7497e-01, PNorm = 53.6340, GNorm = 1.8339, lr_0 = 5.9931e-04
Loss = 4.2700e-01, PNorm = 53.6473, GNorm = 1.4069, lr_0 = 5.9890e-04
Loss = 4.5241e-01, PNorm = 53.6622, GNorm = 2.1748, lr_0 = 5.9849e-04
Loss = 4.1311e-01, PNorm = 53.6667, GNorm = 1.6815, lr_0 = 5.9808e-04
Loss = 4.0621e-01, PNorm = 53.6839, GNorm = 3.0756, lr_0 = 5.9767e-04
Loss = 4.4016e-01, PNorm = 53.6946, GNorm = 1.1080, lr_0 = 5.9726e-04
Loss = 4.0859e-01, PNorm = 53.7041, GNorm = 1.8568, lr_0 = 5.9685e-04
Loss = 3.7594e-01, PNorm = 53.7169, GNorm = 1.5793, lr_0 = 5.9644e-04
Loss = 4.1767e-01, PNorm = 53.7242, GNorm = 1.2266, lr_0 = 5.9603e-04
Loss = 4.4008e-01, PNorm = 53.7362, GNorm = 1.5305, lr_0 = 5.9562e-04
Loss = 4.5964e-01, PNorm = 53.7455, GNorm = 1.3016, lr_0 = 5.9521e-04
Loss = 4.0347e-01, PNorm = 53.7567, GNorm = 0.9232, lr_0 = 5.9481e-04
Loss = 4.0929e-01, PNorm = 53.7691, GNorm = 1.3680, lr_0 = 5.9440e-04
Loss = 4.7438e-01, PNorm = 53.7787, GNorm = 1.4180, lr_0 = 5.9399e-04
Loss = 4.3807e-01, PNorm = 53.7874, GNorm = 1.0518, lr_0 = 5.9358e-04
Loss = 4.2220e-01, PNorm = 53.7974, GNorm = 1.4451, lr_0 = 5.9318e-04
Loss = 4.2198e-01, PNorm = 53.8090, GNorm = 1.6188, lr_0 = 5.9277e-04
Loss = 4.6602e-01, PNorm = 53.8242, GNorm = 1.7155, lr_0 = 5.9236e-04
Loss = 4.1861e-01, PNorm = 53.8360, GNorm = 1.4644, lr_0 = 5.9196e-04
Loss = 4.6663e-01, PNorm = 53.8491, GNorm = 1.3637, lr_0 = 5.9155e-04
Loss = 4.7554e-01, PNorm = 53.8647, GNorm = 1.3627, lr_0 = 5.9115e-04
Loss = 4.5693e-01, PNorm = 53.8748, GNorm = 1.1353, lr_0 = 5.9074e-04
Loss = 4.9187e-01, PNorm = 53.8915, GNorm = 1.1795, lr_0 = 5.9034e-04
Loss = 4.8361e-01, PNorm = 53.9045, GNorm = 1.9107, lr_0 = 5.8993e-04
Loss = 4.1918e-01, PNorm = 53.9176, GNorm = 1.0927, lr_0 = 5.8953e-04
Loss = 4.5549e-01, PNorm = 53.9300, GNorm = 1.3830, lr_0 = 5.8913e-04
Loss = 4.4435e-01, PNorm = 53.9404, GNorm = 1.2711, lr_0 = 5.8872e-04
Loss = 4.4656e-01, PNorm = 53.9479, GNorm = 1.1499, lr_0 = 5.8832e-04
Loss = 4.4274e-01, PNorm = 53.9581, GNorm = 1.0871, lr_0 = 5.8792e-04
Loss = 4.0438e-01, PNorm = 53.9696, GNorm = 1.1647, lr_0 = 5.8751e-04
Loss = 4.0696e-01, PNorm = 53.9818, GNorm = 1.1880, lr_0 = 5.8711e-04
Loss = 4.3809e-01, PNorm = 53.9923, GNorm = 1.5459, lr_0 = 5.8671e-04
Loss = 4.2446e-01, PNorm = 54.0041, GNorm = 1.1591, lr_0 = 5.8631e-04
Loss = 4.5350e-01, PNorm = 54.0213, GNorm = 2.0484, lr_0 = 5.8591e-04
Loss = 4.5121e-01, PNorm = 54.0350, GNorm = 2.1484, lr_0 = 5.8550e-04
Loss = 4.7461e-01, PNorm = 54.0526, GNorm = 1.2778, lr_0 = 5.8510e-04
Loss = 4.4388e-01, PNorm = 54.0707, GNorm = 1.4972, lr_0 = 5.8470e-04
Loss = 4.4545e-01, PNorm = 54.0801, GNorm = 1.4437, lr_0 = 5.8430e-04
Loss = 4.3647e-01, PNorm = 54.0897, GNorm = 0.9735, lr_0 = 5.8390e-04
Loss = 4.6807e-01, PNorm = 54.1030, GNorm = 1.1651, lr_0 = 5.8350e-04
Loss = 4.3666e-01, PNorm = 54.1156, GNorm = 1.8030, lr_0 = 5.8310e-04
Loss = 4.2307e-01, PNorm = 54.1255, GNorm = 1.2661, lr_0 = 5.8270e-04
Loss = 3.7859e-01, PNorm = 54.1374, GNorm = 1.3279, lr_0 = 5.8230e-04
Loss = 4.3958e-01, PNorm = 54.1507, GNorm = 2.5380, lr_0 = 5.8190e-04
Loss = 4.0787e-01, PNorm = 54.1602, GNorm = 1.6753, lr_0 = 5.8151e-04
Loss = 4.0968e-01, PNorm = 54.1721, GNorm = 1.2252, lr_0 = 5.8111e-04
Loss = 4.3291e-01, PNorm = 54.1828, GNorm = 1.1636, lr_0 = 5.8071e-04
Loss = 4.2365e-01, PNorm = 54.1898, GNorm = 0.9782, lr_0 = 5.8031e-04
Loss = 4.1704e-01, PNorm = 54.2010, GNorm = 1.5903, lr_0 = 5.7991e-04
Loss = 4.2325e-01, PNorm = 54.2165, GNorm = 1.2175, lr_0 = 5.7952e-04
Loss = 4.1706e-01, PNorm = 54.2280, GNorm = 1.1964, lr_0 = 5.7912e-04
Loss = 4.5391e-01, PNorm = 54.2374, GNorm = 1.9048, lr_0 = 5.7872e-04
Loss = 4.6986e-01, PNorm = 54.2440, GNorm = 1.5563, lr_0 = 5.7833e-04
Loss = 4.1395e-01, PNorm = 54.2568, GNorm = 1.1331, lr_0 = 5.7793e-04
Loss = 3.8461e-01, PNorm = 54.2663, GNorm = 1.5344, lr_0 = 5.7753e-04
Loss = 4.1677e-01, PNorm = 54.2834, GNorm = 1.3221, lr_0 = 5.7714e-04
Loss = 4.9589e-01, PNorm = 54.3001, GNorm = 0.9341, lr_0 = 5.7674e-04
Loss = 4.4184e-01, PNorm = 54.3142, GNorm = 2.1339, lr_0 = 5.7635e-04
Loss = 4.2708e-01, PNorm = 54.3189, GNorm = 1.1433, lr_0 = 5.7595e-04
Loss = 3.8728e-01, PNorm = 54.3257, GNorm = 1.4080, lr_0 = 5.7556e-04
Loss = 4.4690e-01, PNorm = 54.3344, GNorm = 1.5497, lr_0 = 5.7516e-04
Loss = 4.5549e-01, PNorm = 54.3432, GNorm = 1.3146, lr_0 = 5.7477e-04
Loss = 4.5303e-01, PNorm = 54.3590, GNorm = 1.3175, lr_0 = 5.7438e-04
Loss = 4.6073e-01, PNorm = 54.3689, GNorm = 2.0993, lr_0 = 5.7398e-04
Loss = 4.2658e-01, PNorm = 54.3832, GNorm = 1.2099, lr_0 = 5.7359e-04
Loss = 4.6155e-01, PNorm = 54.3982, GNorm = 2.2491, lr_0 = 5.7320e-04
Loss = 4.3381e-01, PNorm = 54.4070, GNorm = 1.9774, lr_0 = 5.7280e-04
Loss = 4.0315e-01, PNorm = 54.4135, GNorm = 0.9788, lr_0 = 5.7241e-04
Loss = 4.0915e-01, PNorm = 54.4210, GNorm = 1.1873, lr_0 = 5.7202e-04
Loss = 3.6729e-01, PNorm = 54.4342, GNorm = 1.0274, lr_0 = 5.7163e-04
Loss = 4.5690e-01, PNorm = 54.4433, GNorm = 1.3515, lr_0 = 5.7124e-04
Loss = 4.3032e-01, PNorm = 54.4549, GNorm = 1.5311, lr_0 = 5.7084e-04
Loss = 3.9564e-01, PNorm = 54.4666, GNorm = 3.0522, lr_0 = 5.7045e-04
Loss = 4.2736e-01, PNorm = 54.4779, GNorm = 1.4679, lr_0 = 5.7006e-04
Loss = 4.7189e-01, PNorm = 54.4859, GNorm = 1.7848, lr_0 = 5.6967e-04
Loss = 4.0409e-01, PNorm = 54.4961, GNorm = 1.5693, lr_0 = 5.6928e-04
Loss = 4.1692e-01, PNorm = 54.5133, GNorm = 1.4440, lr_0 = 5.6889e-04
Loss = 4.5828e-01, PNorm = 54.5220, GNorm = 2.2692, lr_0 = 5.6850e-04
Loss = 4.5357e-01, PNorm = 54.5299, GNorm = 0.9539, lr_0 = 5.6811e-04
Loss = 4.0432e-01, PNorm = 54.5431, GNorm = 0.9052, lr_0 = 5.6772e-04
Loss = 4.2092e-01, PNorm = 54.5530, GNorm = 1.1272, lr_0 = 5.6733e-04
Loss = 4.3555e-01, PNorm = 54.5658, GNorm = 1.2678, lr_0 = 5.6695e-04
Loss = 3.6950e-01, PNorm = 54.5804, GNorm = 0.7679, lr_0 = 5.6656e-04
Loss = 3.6449e-01, PNorm = 54.5852, GNorm = 1.3574, lr_0 = 5.6617e-04
Loss = 4.1681e-01, PNorm = 54.5950, GNorm = 0.9708, lr_0 = 5.6578e-04
Loss = 4.6637e-01, PNorm = 54.6047, GNorm = 1.0321, lr_0 = 5.6539e-04
Loss = 4.7551e-01, PNorm = 54.6173, GNorm = 1.5250, lr_0 = 5.6501e-04
Loss = 4.0301e-01, PNorm = 54.6275, GNorm = 0.9723, lr_0 = 5.6462e-04
Loss = 3.9900e-01, PNorm = 54.6381, GNorm = 0.9937, lr_0 = 5.6423e-04
Loss = 4.1762e-01, PNorm = 54.6427, GNorm = 1.3759, lr_0 = 5.6385e-04
Loss = 4.1008e-01, PNorm = 54.6506, GNorm = 1.3925, lr_0 = 5.6346e-04
Loss = 5.0365e-01, PNorm = 54.6638, GNorm = 2.4972, lr_0 = 5.6307e-04
Loss = 4.8595e-01, PNorm = 54.6727, GNorm = 1.5892, lr_0 = 5.6269e-04
Loss = 4.0559e-01, PNorm = 54.6823, GNorm = 1.0352, lr_0 = 5.6230e-04
Validation mae = 0.116816
Epoch 9
Loss = 3.5888e-01, PNorm = 54.6881, GNorm = 1.0199, lr_0 = 5.6192e-04
Loss = 3.9484e-01, PNorm = 54.7008, GNorm = 1.3110, lr_0 = 5.6153e-04
Loss = 3.9862e-01, PNorm = 54.7122, GNorm = 1.0641, lr_0 = 5.6115e-04
Loss = 3.7095e-01, PNorm = 54.7192, GNorm = 1.0653, lr_0 = 5.6076e-04
Loss = 4.0485e-01, PNorm = 54.7290, GNorm = 1.4809, lr_0 = 5.6038e-04
Loss = 4.0023e-01, PNorm = 54.7407, GNorm = 1.3272, lr_0 = 5.6000e-04
Loss = 4.2865e-01, PNorm = 54.7522, GNorm = 1.2047, lr_0 = 5.5961e-04
Loss = 3.9183e-01, PNorm = 54.7588, GNorm = 1.5828, lr_0 = 5.5923e-04
Loss = 4.4343e-01, PNorm = 54.7669, GNorm = 0.8909, lr_0 = 5.5885e-04
Loss = 4.8452e-01, PNorm = 54.7741, GNorm = 1.2994, lr_0 = 5.5846e-04
Loss = 4.4455e-01, PNorm = 54.7804, GNorm = 1.4995, lr_0 = 5.5808e-04
Loss = 4.5130e-01, PNorm = 54.7930, GNorm = 1.7490, lr_0 = 5.5770e-04
Loss = 3.9554e-01, PNorm = 54.8071, GNorm = 1.0835, lr_0 = 5.5732e-04
Loss = 4.6822e-01, PNorm = 54.8196, GNorm = 2.0629, lr_0 = 5.5693e-04
Loss = 4.3027e-01, PNorm = 54.8284, GNorm = 1.0273, lr_0 = 5.5655e-04
Loss = 4.5439e-01, PNorm = 54.8416, GNorm = 1.2944, lr_0 = 5.5617e-04
Loss = 4.1548e-01, PNorm = 54.8550, GNorm = 1.1192, lr_0 = 5.5579e-04
Loss = 4.3769e-01, PNorm = 54.8647, GNorm = 1.4486, lr_0 = 5.5541e-04
Loss = 4.2272e-01, PNorm = 54.8846, GNorm = 2.7239, lr_0 = 5.5503e-04
Loss = 4.1493e-01, PNorm = 54.8952, GNorm = 1.6338, lr_0 = 5.5465e-04
Loss = 4.0217e-01, PNorm = 54.9065, GNorm = 1.5160, lr_0 = 5.5427e-04
Loss = 3.8027e-01, PNorm = 54.9160, GNorm = 1.0729, lr_0 = 5.5389e-04
Loss = 4.2973e-01, PNorm = 54.9227, GNorm = 1.8277, lr_0 = 5.5351e-04
Loss = 4.0430e-01, PNorm = 54.9319, GNorm = 1.4878, lr_0 = 5.5313e-04
Loss = 3.6872e-01, PNorm = 54.9402, GNorm = 1.5232, lr_0 = 5.5275e-04
Loss = 4.4665e-01, PNorm = 54.9532, GNorm = 1.6107, lr_0 = 5.5237e-04
Loss = 4.3544e-01, PNorm = 54.9698, GNorm = 1.7539, lr_0 = 5.5199e-04
Loss = 3.8308e-01, PNorm = 54.9833, GNorm = 1.0707, lr_0 = 5.5162e-04
Loss = 4.2022e-01, PNorm = 54.9894, GNorm = 1.0331, lr_0 = 5.5124e-04
Loss = 4.2383e-01, PNorm = 54.9978, GNorm = 0.9970, lr_0 = 5.5086e-04
Loss = 4.6013e-01, PNorm = 55.0069, GNorm = 1.6432, lr_0 = 5.5048e-04
Loss = 3.8036e-01, PNorm = 55.0252, GNorm = 1.1468, lr_0 = 5.5011e-04
Loss = 4.2795e-01, PNorm = 55.0333, GNorm = 1.2284, lr_0 = 5.4973e-04
Loss = 3.6692e-01, PNorm = 55.0446, GNorm = 1.8322, lr_0 = 5.4935e-04
Loss = 4.2996e-01, PNorm = 55.0536, GNorm = 1.4001, lr_0 = 5.4898e-04
Loss = 4.6042e-01, PNorm = 55.0653, GNorm = 1.3732, lr_0 = 5.4860e-04
Loss = 3.9101e-01, PNorm = 55.0807, GNorm = 1.0724, lr_0 = 5.4822e-04
Loss = 3.4218e-01, PNorm = 55.0920, GNorm = 0.9382, lr_0 = 5.4785e-04
Loss = 4.4156e-01, PNorm = 55.1015, GNorm = 1.4647, lr_0 = 5.4747e-04
Loss = 4.6261e-01, PNorm = 55.1133, GNorm = 1.7249, lr_0 = 5.4710e-04
Loss = 4.2878e-01, PNorm = 55.1290, GNorm = 0.8321, lr_0 = 5.4672e-04
Loss = 4.5097e-01, PNorm = 55.1407, GNorm = 1.3853, lr_0 = 5.4635e-04
Loss = 4.1005e-01, PNorm = 55.1541, GNorm = 1.5558, lr_0 = 5.4597e-04
Loss = 4.3700e-01, PNorm = 55.1602, GNorm = 1.3341, lr_0 = 5.4560e-04
Loss = 4.2644e-01, PNorm = 55.1663, GNorm = 1.1390, lr_0 = 5.4523e-04
Loss = 4.5659e-01, PNorm = 55.1765, GNorm = 3.0535, lr_0 = 5.4485e-04
Loss = 4.1356e-01, PNorm = 55.1910, GNorm = 2.1115, lr_0 = 5.4448e-04
Loss = 4.8173e-01, PNorm = 55.2059, GNorm = 1.2377, lr_0 = 5.4411e-04
Loss = 4.2884e-01, PNorm = 55.2203, GNorm = 1.8128, lr_0 = 5.4373e-04
Loss = 4.1271e-01, PNorm = 55.2301, GNorm = 1.0145, lr_0 = 5.4336e-04
Loss = 4.3111e-01, PNorm = 55.2406, GNorm = 1.2199, lr_0 = 5.4299e-04
Loss = 4.9191e-01, PNorm = 55.2520, GNorm = 1.0599, lr_0 = 5.4262e-04
Loss = 4.6658e-01, PNorm = 55.2579, GNorm = 1.2387, lr_0 = 5.4225e-04
Loss = 4.4041e-01, PNorm = 55.2656, GNorm = 1.2774, lr_0 = 5.4187e-04
Loss = 4.3533e-01, PNorm = 55.2776, GNorm = 1.0046, lr_0 = 5.4150e-04
Loss = 5.1776e-01, PNorm = 55.2864, GNorm = 1.2245, lr_0 = 5.4113e-04
Loss = 4.2290e-01, PNorm = 55.2960, GNorm = 1.3553, lr_0 = 5.4076e-04
Loss = 4.5154e-01, PNorm = 55.3065, GNorm = 1.2729, lr_0 = 5.4039e-04
Loss = 4.4453e-01, PNorm = 55.3115, GNorm = 1.8770, lr_0 = 5.4002e-04
Loss = 4.4877e-01, PNorm = 55.3118, GNorm = 1.3631, lr_0 = 5.3965e-04
Loss = 4.2877e-01, PNorm = 55.3236, GNorm = 1.4236, lr_0 = 5.3928e-04
Loss = 4.3292e-01, PNorm = 55.3334, GNorm = 1.3419, lr_0 = 5.3891e-04
Loss = 4.1372e-01, PNorm = 55.3457, GNorm = 1.2971, lr_0 = 5.3854e-04
Loss = 4.3058e-01, PNorm = 55.3482, GNorm = 1.1302, lr_0 = 5.3817e-04
Loss = 4.5105e-01, PNorm = 55.3606, GNorm = 1.2533, lr_0 = 5.3781e-04
Loss = 4.4569e-01, PNorm = 55.3703, GNorm = 2.1573, lr_0 = 5.3744e-04
Loss = 4.4709e-01, PNorm = 55.3776, GNorm = 1.4802, lr_0 = 5.3707e-04
Loss = 4.3996e-01, PNorm = 55.3872, GNorm = 1.8003, lr_0 = 5.3670e-04
Loss = 4.0808e-01, PNorm = 55.3982, GNorm = 1.4052, lr_0 = 5.3633e-04
Loss = 4.5727e-01, PNorm = 55.4064, GNorm = 2.1165, lr_0 = 5.3597e-04
Loss = 4.2333e-01, PNorm = 55.4173, GNorm = 1.9032, lr_0 = 5.3560e-04
Loss = 4.3742e-01, PNorm = 55.4290, GNorm = 1.6131, lr_0 = 5.3523e-04
Loss = 4.5509e-01, PNorm = 55.4435, GNorm = 1.0568, lr_0 = 5.3486e-04
Loss = 4.0046e-01, PNorm = 55.4561, GNorm = 1.5925, lr_0 = 5.3450e-04
Loss = 4.5800e-01, PNorm = 55.4678, GNorm = 1.2032, lr_0 = 5.3413e-04
Loss = 4.8793e-01, PNorm = 55.4748, GNorm = 1.4587, lr_0 = 5.3377e-04
Loss = 3.7927e-01, PNorm = 55.4841, GNorm = 1.8242, lr_0 = 5.3340e-04
Loss = 4.4675e-01, PNorm = 55.4978, GNorm = 1.2358, lr_0 = 5.3304e-04
Loss = 4.5497e-01, PNorm = 55.5094, GNorm = 1.0153, lr_0 = 5.3267e-04
Loss = 4.4973e-01, PNorm = 55.5204, GNorm = 2.3671, lr_0 = 5.3231e-04
Loss = 3.4687e-01, PNorm = 55.5283, GNorm = 1.3568, lr_0 = 5.3194e-04
Loss = 4.7863e-01, PNorm = 55.5384, GNorm = 1.8995, lr_0 = 5.3158e-04
Loss = 3.9364e-01, PNorm = 55.5522, GNorm = 1.5847, lr_0 = 5.3121e-04
Loss = 4.0577e-01, PNorm = 55.5597, GNorm = 1.0717, lr_0 = 5.3085e-04
Loss = 3.8055e-01, PNorm = 55.5695, GNorm = 1.3257, lr_0 = 5.3048e-04
Loss = 4.4354e-01, PNorm = 55.5783, GNorm = 1.6988, lr_0 = 5.3012e-04
Loss = 4.3600e-01, PNorm = 55.5869, GNorm = 1.1555, lr_0 = 5.2976e-04
Loss = 4.3974e-01, PNorm = 55.5946, GNorm = 1.7872, lr_0 = 5.2939e-04
Loss = 4.1935e-01, PNorm = 55.5957, GNorm = 2.1047, lr_0 = 5.2903e-04
Loss = 4.8063e-01, PNorm = 55.6060, GNorm = 1.7056, lr_0 = 5.2867e-04
Loss = 4.5515e-01, PNorm = 55.6165, GNorm = 1.5923, lr_0 = 5.2831e-04
Loss = 3.7359e-01, PNorm = 55.6293, GNorm = 1.0163, lr_0 = 5.2795e-04
Loss = 4.7252e-01, PNorm = 55.6360, GNorm = 1.2143, lr_0 = 5.2758e-04
Loss = 4.6042e-01, PNorm = 55.6486, GNorm = 2.8293, lr_0 = 5.2722e-04
Loss = 3.6763e-01, PNorm = 55.6558, GNorm = 1.2280, lr_0 = 5.2686e-04
Loss = 4.6872e-01, PNorm = 55.6628, GNorm = 1.0478, lr_0 = 5.2650e-04
Loss = 4.8790e-01, PNorm = 55.6725, GNorm = 1.6052, lr_0 = 5.2614e-04
Loss = 4.3742e-01, PNorm = 55.6823, GNorm = 1.1324, lr_0 = 5.2578e-04
Loss = 4.4693e-01, PNorm = 55.6932, GNorm = 0.9924, lr_0 = 5.2542e-04
Loss = 4.3863e-01, PNorm = 55.7008, GNorm = 1.3608, lr_0 = 5.2506e-04
Loss = 4.0231e-01, PNorm = 55.7151, GNorm = 1.2248, lr_0 = 5.2470e-04
Loss = 4.0782e-01, PNorm = 55.7243, GNorm = 1.2803, lr_0 = 5.2434e-04
Loss = 4.1886e-01, PNorm = 55.7382, GNorm = 1.3895, lr_0 = 5.2398e-04
Loss = 3.7822e-01, PNorm = 55.7468, GNorm = 1.6382, lr_0 = 5.2362e-04
Loss = 3.5722e-01, PNorm = 55.7562, GNorm = 1.5055, lr_0 = 5.2326e-04
Loss = 3.9628e-01, PNorm = 55.7628, GNorm = 1.1249, lr_0 = 5.2290e-04
Loss = 3.9593e-01, PNorm = 55.7769, GNorm = 1.8174, lr_0 = 5.2255e-04
Loss = 4.2988e-01, PNorm = 55.7851, GNorm = 1.1847, lr_0 = 5.2219e-04
Loss = 3.9871e-01, PNorm = 55.7982, GNorm = 1.4062, lr_0 = 5.2183e-04
Loss = 3.9281e-01, PNorm = 55.8088, GNorm = 1.1014, lr_0 = 5.2147e-04
Loss = 3.9505e-01, PNorm = 55.8138, GNorm = 1.2260, lr_0 = 5.2112e-04
Loss = 3.7947e-01, PNorm = 55.8232, GNorm = 1.1251, lr_0 = 5.2076e-04
Loss = 4.8289e-01, PNorm = 55.8349, GNorm = 1.1781, lr_0 = 5.2040e-04
Loss = 4.9040e-01, PNorm = 55.8478, GNorm = 1.4975, lr_0 = 5.2005e-04
Loss = 4.1619e-01, PNorm = 55.8629, GNorm = 1.1353, lr_0 = 5.1969e-04
Loss = 3.7243e-01, PNorm = 55.8745, GNorm = 1.4878, lr_0 = 5.1933e-04
Loss = 4.3305e-01, PNorm = 55.8834, GNorm = 1.7372, lr_0 = 5.1898e-04
Loss = 3.9939e-01, PNorm = 55.8987, GNorm = 0.9379, lr_0 = 5.1862e-04
Loss = 3.9325e-01, PNorm = 55.9054, GNorm = 1.0237, lr_0 = 5.1827e-04
Loss = 4.2744e-01, PNorm = 55.9167, GNorm = 0.9418, lr_0 = 5.1791e-04
Validation mae = 0.115478
Epoch 10
Loss = 4.1897e-01, PNorm = 55.9212, GNorm = 2.6777, lr_0 = 5.1756e-04
Loss = 4.1272e-01, PNorm = 55.9367, GNorm = 1.4457, lr_0 = 5.1720e-04
Loss = 3.9473e-01, PNorm = 55.9449, GNorm = 1.2159, lr_0 = 5.1685e-04
Loss = 4.7759e-01, PNorm = 55.9586, GNorm = 1.1761, lr_0 = 5.1649e-04
Loss = 3.7644e-01, PNorm = 55.9704, GNorm = 1.0438, lr_0 = 5.1614e-04
Loss = 4.1515e-01, PNorm = 55.9824, GNorm = 1.9430, lr_0 = 5.1579e-04
Loss = 3.7550e-01, PNorm = 55.9935, GNorm = 1.3141, lr_0 = 5.1543e-04
Loss = 3.3897e-01, PNorm = 56.0031, GNorm = 1.5422, lr_0 = 5.1508e-04
Loss = 4.1874e-01, PNorm = 56.0126, GNorm = 1.2417, lr_0 = 5.1473e-04
Loss = 4.1106e-01, PNorm = 56.0211, GNorm = 1.2438, lr_0 = 5.1437e-04
Loss = 4.5687e-01, PNorm = 56.0315, GNorm = 1.5111, lr_0 = 5.1402e-04
Loss = 4.4142e-01, PNorm = 56.0420, GNorm = 1.6753, lr_0 = 5.1367e-04
Loss = 3.6688e-01, PNorm = 56.0456, GNorm = 1.0159, lr_0 = 5.1332e-04
Loss = 3.9611e-01, PNorm = 56.0576, GNorm = 1.1797, lr_0 = 5.1297e-04
Loss = 3.5325e-01, PNorm = 56.0731, GNorm = 1.3990, lr_0 = 5.1262e-04
Loss = 4.6287e-01, PNorm = 56.0871, GNorm = 2.2174, lr_0 = 5.1226e-04
Loss = 4.7932e-01, PNorm = 56.0949, GNorm = 1.2943, lr_0 = 5.1191e-04
Loss = 4.1937e-01, PNorm = 56.1059, GNorm = 1.6989, lr_0 = 5.1156e-04
Loss = 4.0496e-01, PNorm = 56.1182, GNorm = 2.0201, lr_0 = 5.1121e-04
Loss = 4.1103e-01, PNorm = 56.1310, GNorm = 1.1897, lr_0 = 5.1086e-04
Loss = 4.5615e-01, PNorm = 56.1390, GNorm = 1.5896, lr_0 = 5.1051e-04
Loss = 3.6863e-01, PNorm = 56.1496, GNorm = 1.0979, lr_0 = 5.1016e-04
Loss = 4.1627e-01, PNorm = 56.1572, GNorm = 2.3536, lr_0 = 5.0981e-04
Loss = 3.8199e-01, PNorm = 56.1602, GNorm = 1.3365, lr_0 = 5.0946e-04
Loss = 4.4386e-01, PNorm = 56.1718, GNorm = 1.8065, lr_0 = 5.0911e-04
Loss = 3.8007e-01, PNorm = 56.1845, GNorm = 2.1200, lr_0 = 5.0877e-04
Loss = 4.0578e-01, PNorm = 56.1933, GNorm = 1.2902, lr_0 = 5.0842e-04
Loss = 3.8523e-01, PNorm = 56.1999, GNorm = 1.0743, lr_0 = 5.0807e-04
Loss = 4.3060e-01, PNorm = 56.2090, GNorm = 1.6009, lr_0 = 5.0772e-04
Loss = 3.6865e-01, PNorm = 56.2119, GNorm = 2.5883, lr_0 = 5.0737e-04
Loss = 3.6717e-01, PNorm = 56.2211, GNorm = 1.0176, lr_0 = 5.0703e-04
Loss = 4.2700e-01, PNorm = 56.2330, GNorm = 1.5256, lr_0 = 5.0668e-04
Loss = 3.4285e-01, PNorm = 56.2443, GNorm = 1.4118, lr_0 = 5.0633e-04
Loss = 5.1245e-01, PNorm = 56.2458, GNorm = 1.3003, lr_0 = 5.0598e-04
Loss = 3.9349e-01, PNorm = 56.2562, GNorm = 1.7233, lr_0 = 5.0564e-04
Loss = 4.2192e-01, PNorm = 56.2660, GNorm = 2.1882, lr_0 = 5.0529e-04
Loss = 4.2422e-01, PNorm = 56.2717, GNorm = 1.4089, lr_0 = 5.0494e-04
Loss = 4.0603e-01, PNorm = 56.2790, GNorm = 1.3700, lr_0 = 5.0460e-04
Loss = 4.6206e-01, PNorm = 56.2906, GNorm = 1.3511, lr_0 = 5.0425e-04
Loss = 4.2578e-01, PNorm = 56.3060, GNorm = 1.2510, lr_0 = 5.0391e-04
Loss = 3.9847e-01, PNorm = 56.3140, GNorm = 1.2668, lr_0 = 5.0356e-04
Loss = 4.2317e-01, PNorm = 56.3216, GNorm = 1.4229, lr_0 = 5.0322e-04
Loss = 4.1104e-01, PNorm = 56.3343, GNorm = 1.3779, lr_0 = 5.0287e-04
Loss = 4.3235e-01, PNorm = 56.3460, GNorm = 1.5327, lr_0 = 5.0253e-04
Loss = 4.4054e-01, PNorm = 56.3543, GNorm = 1.6613, lr_0 = 5.0218e-04
Loss = 4.3747e-01, PNorm = 56.3589, GNorm = 0.8702, lr_0 = 5.0184e-04
Loss = 4.2097e-01, PNorm = 56.3614, GNorm = 0.9658, lr_0 = 5.0150e-04
Loss = 4.2127e-01, PNorm = 56.3735, GNorm = 1.4120, lr_0 = 5.0115e-04
Loss = 4.1370e-01, PNorm = 56.3810, GNorm = 1.2915, lr_0 = 5.0081e-04
Loss = 4.0330e-01, PNorm = 56.3917, GNorm = 2.0619, lr_0 = 5.0047e-04
Loss = 3.8937e-01, PNorm = 56.3999, GNorm = 0.9158, lr_0 = 5.0012e-04
Loss = 3.5572e-01, PNorm = 56.4095, GNorm = 0.7090, lr_0 = 4.9978e-04
Loss = 4.1441e-01, PNorm = 56.4185, GNorm = 1.2225, lr_0 = 4.9944e-04
Loss = 4.1239e-01, PNorm = 56.4226, GNorm = 1.3423, lr_0 = 4.9910e-04
Loss = 4.3445e-01, PNorm = 56.4301, GNorm = 1.3130, lr_0 = 4.9875e-04
Loss = 4.1088e-01, PNorm = 56.4461, GNorm = 1.6319, lr_0 = 4.9841e-04
Loss = 4.4820e-01, PNorm = 56.4529, GNorm = 1.3815, lr_0 = 4.9807e-04
Loss = 4.2393e-01, PNorm = 56.4606, GNorm = 1.7830, lr_0 = 4.9773e-04
Loss = 5.0214e-01, PNorm = 56.4680, GNorm = 1.2367, lr_0 = 4.9739e-04
Loss = 4.3048e-01, PNorm = 56.4770, GNorm = 1.5673, lr_0 = 4.9705e-04
Loss = 4.6469e-01, PNorm = 56.4861, GNorm = 2.1827, lr_0 = 4.9671e-04
Loss = 3.6662e-01, PNorm = 56.5001, GNorm = 1.0680, lr_0 = 4.9637e-04
Loss = 4.0927e-01, PNorm = 56.5063, GNorm = 1.2573, lr_0 = 4.9603e-04
Loss = 4.1954e-01, PNorm = 56.5219, GNorm = 1.0883, lr_0 = 4.9569e-04
Loss = 4.3069e-01, PNorm = 56.5297, GNorm = 1.3395, lr_0 = 4.9535e-04
Loss = 3.9565e-01, PNorm = 56.5362, GNorm = 1.4296, lr_0 = 4.9501e-04
Loss = 3.8163e-01, PNorm = 56.5403, GNorm = 1.2918, lr_0 = 4.9467e-04
Loss = 4.6765e-01, PNorm = 56.5464, GNorm = 1.3274, lr_0 = 4.9433e-04
Loss = 4.1835e-01, PNorm = 56.5586, GNorm = 1.4187, lr_0 = 4.9399e-04
Loss = 4.2769e-01, PNorm = 56.5704, GNorm = 2.5787, lr_0 = 4.9365e-04
Loss = 3.8994e-01, PNorm = 56.5769, GNorm = 2.0973, lr_0 = 4.9332e-04
Loss = 4.1429e-01, PNorm = 56.5856, GNorm = 1.5437, lr_0 = 4.9298e-04
Loss = 4.8047e-01, PNorm = 56.5953, GNorm = 1.4676, lr_0 = 4.9264e-04
Loss = 4.7070e-01, PNorm = 56.6097, GNorm = 1.6971, lr_0 = 4.9230e-04
Loss = 3.9953e-01, PNorm = 56.6168, GNorm = 1.4740, lr_0 = 4.9197e-04
Loss = 4.7292e-01, PNorm = 56.6287, GNorm = 1.4595, lr_0 = 4.9163e-04
Loss = 3.9329e-01, PNorm = 56.6353, GNorm = 1.1160, lr_0 = 4.9129e-04
Loss = 4.2004e-01, PNorm = 56.6472, GNorm = 1.5132, lr_0 = 4.9095e-04
Loss = 4.4535e-01, PNorm = 56.6556, GNorm = 1.8353, lr_0 = 4.9062e-04
Loss = 3.8920e-01, PNorm = 56.6667, GNorm = 1.2242, lr_0 = 4.9028e-04
Loss = 4.2607e-01, PNorm = 56.6760, GNorm = 1.4415, lr_0 = 4.8995e-04
Loss = 3.5360e-01, PNorm = 56.6841, GNorm = 1.1637, lr_0 = 4.8961e-04
Loss = 3.9124e-01, PNorm = 56.6927, GNorm = 1.6540, lr_0 = 4.8928e-04
Loss = 4.6277e-01, PNorm = 56.7002, GNorm = 1.3901, lr_0 = 4.8894e-04
Loss = 4.0828e-01, PNorm = 56.7094, GNorm = 1.1894, lr_0 = 4.8861e-04
Loss = 4.6024e-01, PNorm = 56.7214, GNorm = 1.9111, lr_0 = 4.8827e-04
Loss = 3.9415e-01, PNorm = 56.7265, GNorm = 1.2400, lr_0 = 4.8794e-04
Loss = 4.8046e-01, PNorm = 56.7381, GNorm = 1.3114, lr_0 = 4.8760e-04
Loss = 4.2123e-01, PNorm = 56.7500, GNorm = 1.4074, lr_0 = 4.8727e-04
Loss = 3.7660e-01, PNorm = 56.7594, GNorm = 2.0009, lr_0 = 4.8693e-04
Loss = 4.1987e-01, PNorm = 56.7666, GNorm = 1.5106, lr_0 = 4.8660e-04
Loss = 4.3312e-01, PNorm = 56.7744, GNorm = 1.9687, lr_0 = 4.8627e-04
Loss = 4.5049e-01, PNorm = 56.7832, GNorm = 2.8575, lr_0 = 4.8593e-04
Loss = 4.4719e-01, PNorm = 56.7844, GNorm = 1.2362, lr_0 = 4.8560e-04
Loss = 3.9671e-01, PNorm = 56.7968, GNorm = 1.4106, lr_0 = 4.8527e-04
Loss = 4.1736e-01, PNorm = 56.8090, GNorm = 1.5044, lr_0 = 4.8494e-04
Loss = 3.8778e-01, PNorm = 56.8167, GNorm = 1.2805, lr_0 = 4.8460e-04
Loss = 3.8881e-01, PNorm = 56.8272, GNorm = 1.8971, lr_0 = 4.8427e-04
Loss = 4.4015e-01, PNorm = 56.8339, GNorm = 1.5421, lr_0 = 4.8394e-04
Loss = 4.1823e-01, PNorm = 56.8425, GNorm = 1.3104, lr_0 = 4.8361e-04
Loss = 3.9701e-01, PNorm = 56.8496, GNorm = 1.3158, lr_0 = 4.8328e-04
Loss = 3.9458e-01, PNorm = 56.8561, GNorm = 1.1091, lr_0 = 4.8295e-04
Loss = 4.4792e-01, PNorm = 56.8628, GNorm = 1.3437, lr_0 = 4.8262e-04
Loss = 4.4505e-01, PNorm = 56.8620, GNorm = 1.6103, lr_0 = 4.8228e-04
Loss = 3.9202e-01, PNorm = 56.8727, GNorm = 1.6368, lr_0 = 4.8195e-04
Loss = 4.4864e-01, PNorm = 56.8828, GNorm = 1.1353, lr_0 = 4.8162e-04
Loss = 4.6281e-01, PNorm = 56.8948, GNorm = 1.3882, lr_0 = 4.8129e-04
Loss = 4.2242e-01, PNorm = 56.9017, GNorm = 2.0644, lr_0 = 4.8096e-04
Loss = 4.3111e-01, PNorm = 56.9092, GNorm = 0.9970, lr_0 = 4.8064e-04
Loss = 3.4569e-01, PNorm = 56.9165, GNorm = 1.2210, lr_0 = 4.8031e-04
Loss = 3.9331e-01, PNorm = 56.9237, GNorm = 1.0254, lr_0 = 4.7998e-04
Loss = 3.5961e-01, PNorm = 56.9315, GNorm = 2.0350, lr_0 = 4.7965e-04
Loss = 3.9780e-01, PNorm = 56.9395, GNorm = 1.2484, lr_0 = 4.7932e-04
Loss = 3.8034e-01, PNorm = 56.9452, GNorm = 0.9799, lr_0 = 4.7899e-04
Loss = 4.0628e-01, PNorm = 56.9522, GNorm = 1.1971, lr_0 = 4.7866e-04
Loss = 4.2013e-01, PNorm = 56.9557, GNorm = 1.0098, lr_0 = 4.7833e-04
Loss = 4.5128e-01, PNorm = 56.9632, GNorm = 1.2673, lr_0 = 4.7801e-04
Loss = 4.1447e-01, PNorm = 56.9708, GNorm = 2.0459, lr_0 = 4.7768e-04
Loss = 4.1016e-01, PNorm = 56.9773, GNorm = 1.4316, lr_0 = 4.7735e-04
Loss = 4.0435e-01, PNorm = 56.9783, GNorm = 2.0136, lr_0 = 4.7703e-04
Validation mae = 0.117502
Epoch 11
Loss = 4.2524e-01, PNorm = 56.9867, GNorm = 1.2077, lr_0 = 4.7670e-04
Loss = 4.0677e-01, PNorm = 56.9928, GNorm = 0.9598, lr_0 = 4.7637e-04
Loss = 3.9245e-01, PNorm = 57.0000, GNorm = 1.1587, lr_0 = 4.7605e-04
Loss = 3.9108e-01, PNorm = 57.0093, GNorm = 1.2231, lr_0 = 4.7572e-04
Loss = 3.8413e-01, PNorm = 57.0161, GNorm = 1.2571, lr_0 = 4.7539e-04
Loss = 4.0975e-01, PNorm = 57.0151, GNorm = 1.5613, lr_0 = 4.7507e-04
Loss = 4.2166e-01, PNorm = 57.0188, GNorm = 1.7931, lr_0 = 4.7474e-04
Loss = 4.0552e-01, PNorm = 57.0273, GNorm = 1.3602, lr_0 = 4.7442e-04
Loss = 4.5319e-01, PNorm = 57.0341, GNorm = 2.6411, lr_0 = 4.7409e-04
Loss = 3.9454e-01, PNorm = 57.0421, GNorm = 1.1423, lr_0 = 4.7377e-04
Loss = 4.2015e-01, PNorm = 57.0447, GNorm = 1.3105, lr_0 = 4.7344e-04
Loss = 4.0887e-01, PNorm = 57.0521, GNorm = 0.8423, lr_0 = 4.7312e-04
Loss = 3.7968e-01, PNorm = 57.0587, GNorm = 1.1566, lr_0 = 4.7279e-04
Loss = 3.6043e-01, PNorm = 57.0708, GNorm = 1.8668, lr_0 = 4.7247e-04
Loss = 3.9532e-01, PNorm = 57.0803, GNorm = 1.5808, lr_0 = 4.7215e-04
Loss = 4.2942e-01, PNorm = 57.0883, GNorm = 2.2057, lr_0 = 4.7182e-04
Loss = 4.2802e-01, PNorm = 57.0953, GNorm = 1.2884, lr_0 = 4.7150e-04
Loss = 4.6182e-01, PNorm = 57.1084, GNorm = 1.9297, lr_0 = 4.7118e-04
Loss = 4.2483e-01, PNorm = 57.1136, GNorm = 1.9520, lr_0 = 4.7085e-04
Loss = 4.3662e-01, PNorm = 57.1193, GNorm = 1.4368, lr_0 = 4.7053e-04
Loss = 3.9849e-01, PNorm = 57.1266, GNorm = 1.2971, lr_0 = 4.7021e-04
Loss = 4.6996e-01, PNorm = 57.1331, GNorm = 1.2990, lr_0 = 4.6989e-04
Loss = 4.0189e-01, PNorm = 57.1438, GNorm = 0.7849, lr_0 = 4.6957e-04
Loss = 3.5707e-01, PNorm = 57.1488, GNorm = 1.2189, lr_0 = 4.6924e-04
Loss = 3.8063e-01, PNorm = 57.1520, GNorm = 1.0921, lr_0 = 4.6892e-04
Loss = 4.1806e-01, PNorm = 57.1569, GNorm = 0.9483, lr_0 = 4.6860e-04
Loss = 4.1214e-01, PNorm = 57.1680, GNorm = 1.2604, lr_0 = 4.6828e-04
Loss = 4.0643e-01, PNorm = 57.1733, GNorm = 1.7426, lr_0 = 4.6796e-04
Loss = 3.6480e-01, PNorm = 57.1811, GNorm = 1.0566, lr_0 = 4.6764e-04
Loss = 3.8599e-01, PNorm = 57.1888, GNorm = 1.1550, lr_0 = 4.6732e-04
Loss = 3.5148e-01, PNorm = 57.1996, GNorm = 1.6662, lr_0 = 4.6700e-04
Loss = 4.3242e-01, PNorm = 57.2108, GNorm = 1.6507, lr_0 = 4.6668e-04
Loss = 4.2495e-01, PNorm = 57.2204, GNorm = 1.4314, lr_0 = 4.6636e-04
Loss = 4.2502e-01, PNorm = 57.2295, GNorm = 1.2447, lr_0 = 4.6604e-04
Loss = 4.3530e-01, PNorm = 57.2379, GNorm = 1.1336, lr_0 = 4.6572e-04
Loss = 3.4236e-01, PNorm = 57.2474, GNorm = 1.4967, lr_0 = 4.6540e-04
Loss = 3.7786e-01, PNorm = 57.2515, GNorm = 1.7303, lr_0 = 4.6508e-04
Loss = 4.0578e-01, PNorm = 57.2610, GNorm = 1.5106, lr_0 = 4.6476e-04
Loss = 4.0963e-01, PNorm = 57.2670, GNorm = 1.6100, lr_0 = 4.6445e-04
Loss = 3.9443e-01, PNorm = 57.2785, GNorm = 2.2268, lr_0 = 4.6413e-04
Loss = 4.1934e-01, PNorm = 57.2837, GNorm = 1.2541, lr_0 = 4.6381e-04
Loss = 4.3166e-01, PNorm = 57.2921, GNorm = 1.4004, lr_0 = 4.6349e-04
Loss = 3.8463e-01, PNorm = 57.2979, GNorm = 1.0705, lr_0 = 4.6317e-04
Loss = 4.5998e-01, PNorm = 57.3069, GNorm = 1.3805, lr_0 = 4.6286e-04
Loss = 3.8380e-01, PNorm = 57.3153, GNorm = 0.9737, lr_0 = 4.6254e-04
Loss = 3.9754e-01, PNorm = 57.3252, GNorm = 1.2986, lr_0 = 4.6222e-04
Loss = 3.8491e-01, PNorm = 57.3382, GNorm = 1.6698, lr_0 = 4.6191e-04
Loss = 4.0694e-01, PNorm = 57.3432, GNorm = 1.6778, lr_0 = 4.6159e-04
Loss = 3.6442e-01, PNorm = 57.3514, GNorm = 2.0383, lr_0 = 4.6127e-04
Loss = 4.0620e-01, PNorm = 57.3593, GNorm = 1.2156, lr_0 = 4.6096e-04
Loss = 4.4585e-01, PNorm = 57.3660, GNorm = 1.1760, lr_0 = 4.6064e-04
Loss = 4.0691e-01, PNorm = 57.3757, GNorm = 1.5510, lr_0 = 4.6033e-04
Loss = 4.5424e-01, PNorm = 57.3859, GNorm = 2.2086, lr_0 = 4.6001e-04
Loss = 3.7663e-01, PNorm = 57.3940, GNorm = 1.3063, lr_0 = 4.5970e-04
Loss = 3.3871e-01, PNorm = 57.4034, GNorm = 1.3130, lr_0 = 4.5938e-04
Loss = 3.8357e-01, PNorm = 57.4080, GNorm = 1.5252, lr_0 = 4.5907e-04
Loss = 3.6766e-01, PNorm = 57.4115, GNorm = 1.3970, lr_0 = 4.5875e-04
Loss = 4.3404e-01, PNorm = 57.4174, GNorm = 1.7866, lr_0 = 4.5844e-04
Loss = 3.8555e-01, PNorm = 57.4254, GNorm = 1.7050, lr_0 = 4.5812e-04
Loss = 3.6632e-01, PNorm = 57.4292, GNorm = 1.5000, lr_0 = 4.5781e-04
Loss = 4.3566e-01, PNorm = 57.4367, GNorm = 1.1048, lr_0 = 4.5750e-04
Loss = 3.4203e-01, PNorm = 57.4425, GNorm = 0.9762, lr_0 = 4.5718e-04
Loss = 4.5426e-01, PNorm = 57.4471, GNorm = 1.4675, lr_0 = 4.5687e-04
Loss = 3.7358e-01, PNorm = 57.4564, GNorm = 1.2248, lr_0 = 4.5656e-04
Loss = 4.7551e-01, PNorm = 57.4647, GNorm = 1.0720, lr_0 = 4.5624e-04
Loss = 4.2691e-01, PNorm = 57.4795, GNorm = 2.7896, lr_0 = 4.5593e-04
Loss = 4.4094e-01, PNorm = 57.4851, GNorm = 1.3695, lr_0 = 4.5562e-04
Loss = 4.1903e-01, PNorm = 57.5037, GNorm = 1.6185, lr_0 = 4.5531e-04
Loss = 4.0749e-01, PNorm = 57.5163, GNorm = 1.3525, lr_0 = 4.5499e-04
Loss = 4.3143e-01, PNorm = 57.5231, GNorm = 1.3659, lr_0 = 4.5468e-04
Loss = 4.1801e-01, PNorm = 57.5246, GNorm = 1.5980, lr_0 = 4.5437e-04
Loss = 3.5797e-01, PNorm = 57.5287, GNorm = 0.9834, lr_0 = 4.5406e-04
Loss = 4.3221e-01, PNorm = 57.5355, GNorm = 1.8074, lr_0 = 4.5375e-04
Loss = 4.3960e-01, PNorm = 57.5435, GNorm = 1.5095, lr_0 = 4.5344e-04
Loss = 4.1135e-01, PNorm = 57.5479, GNorm = 1.1951, lr_0 = 4.5313e-04
Loss = 3.9482e-01, PNorm = 57.5563, GNorm = 1.5176, lr_0 = 4.5282e-04
Loss = 3.8883e-01, PNorm = 57.5667, GNorm = 1.5478, lr_0 = 4.5251e-04
Loss = 4.1054e-01, PNorm = 57.5811, GNorm = 1.3081, lr_0 = 4.5220e-04
Loss = 3.6830e-01, PNorm = 57.5925, GNorm = 1.1355, lr_0 = 4.5189e-04
Loss = 3.4172e-01, PNorm = 57.6009, GNorm = 1.3424, lr_0 = 4.5158e-04
Loss = 4.1114e-01, PNorm = 57.6119, GNorm = 2.5178, lr_0 = 4.5127e-04
Loss = 4.0602e-01, PNorm = 57.6169, GNorm = 2.0459, lr_0 = 4.5096e-04
Loss = 4.0125e-01, PNorm = 57.6262, GNorm = 1.6605, lr_0 = 4.5065e-04
Loss = 3.6645e-01, PNorm = 57.6297, GNorm = 1.8884, lr_0 = 4.5034e-04
Loss = 3.9251e-01, PNorm = 57.6387, GNorm = 1.4320, lr_0 = 4.5003e-04
Loss = 4.0755e-01, PNorm = 57.6456, GNorm = 1.9253, lr_0 = 4.4972e-04
Loss = 4.0097e-01, PNorm = 57.6530, GNorm = 1.0641, lr_0 = 4.4942e-04
Loss = 4.2457e-01, PNorm = 57.6535, GNorm = 1.0497, lr_0 = 4.4911e-04
Loss = 4.2754e-01, PNorm = 57.6639, GNorm = 1.1897, lr_0 = 4.4880e-04
Loss = 4.4744e-01, PNorm = 57.6708, GNorm = 1.5308, lr_0 = 4.4849e-04
Loss = 5.1040e-01, PNorm = 57.6858, GNorm = 1.9675, lr_0 = 4.4819e-04
Loss = 3.6988e-01, PNorm = 57.6946, GNorm = 1.3731, lr_0 = 4.4788e-04
Loss = 3.9808e-01, PNorm = 57.6977, GNorm = 1.3140, lr_0 = 4.4757e-04
Loss = 4.3348e-01, PNorm = 57.7013, GNorm = 1.5686, lr_0 = 4.4727e-04
Loss = 4.1353e-01, PNorm = 57.7043, GNorm = 2.1688, lr_0 = 4.4696e-04
Loss = 3.8335e-01, PNorm = 57.7080, GNorm = 1.6264, lr_0 = 4.4665e-04
Loss = 4.3504e-01, PNorm = 57.7151, GNorm = 1.4926, lr_0 = 4.4635e-04
Loss = 3.5733e-01, PNorm = 57.7230, GNorm = 1.0894, lr_0 = 4.4604e-04
Loss = 4.3707e-01, PNorm = 57.7323, GNorm = 1.2790, lr_0 = 4.4574e-04
Loss = 4.4597e-01, PNorm = 57.7363, GNorm = 1.7807, lr_0 = 4.4543e-04
Loss = 4.7037e-01, PNorm = 57.7474, GNorm = 1.3018, lr_0 = 4.4513e-04
Loss = 3.9699e-01, PNorm = 57.7539, GNorm = 1.0291, lr_0 = 4.4482e-04
Loss = 4.5017e-01, PNorm = 57.7653, GNorm = 1.5351, lr_0 = 4.4452e-04
Loss = 3.4498e-01, PNorm = 57.7714, GNorm = 1.1068, lr_0 = 4.4421e-04
Loss = 4.7349e-01, PNorm = 57.7814, GNorm = 1.6523, lr_0 = 4.4391e-04
Loss = 4.0159e-01, PNorm = 57.7888, GNorm = 1.5281, lr_0 = 4.4360e-04
Loss = 4.3146e-01, PNorm = 57.7942, GNorm = 1.1414, lr_0 = 4.4330e-04
Loss = 3.7963e-01, PNorm = 57.8028, GNorm = 1.5529, lr_0 = 4.4299e-04
Loss = 3.9195e-01, PNorm = 57.8111, GNorm = 1.4709, lr_0 = 4.4269e-04
Loss = 4.8587e-01, PNorm = 57.8158, GNorm = 1.4993, lr_0 = 4.4239e-04
Loss = 3.9803e-01, PNorm = 57.8258, GNorm = 1.0721, lr_0 = 4.4209e-04
Loss = 4.9392e-01, PNorm = 57.8306, GNorm = 1.2450, lr_0 = 4.4178e-04
Loss = 3.7055e-01, PNorm = 57.8394, GNorm = 1.5917, lr_0 = 4.4148e-04
Loss = 4.0822e-01, PNorm = 57.8442, GNorm = 1.4523, lr_0 = 4.4118e-04
Loss = 4.4934e-01, PNorm = 57.8571, GNorm = 1.7481, lr_0 = 4.4088e-04
Loss = 3.7411e-01, PNorm = 57.8631, GNorm = 1.7865, lr_0 = 4.4057e-04
Loss = 3.8038e-01, PNorm = 57.8747, GNorm = 2.1479, lr_0 = 4.4027e-04
Loss = 4.4685e-01, PNorm = 57.8843, GNorm = 1.3782, lr_0 = 4.3997e-04
Loss = 3.6856e-01, PNorm = 57.8969, GNorm = 1.9789, lr_0 = 4.3967e-04
Loss = 4.2458e-01, PNorm = 57.9050, GNorm = 1.1929, lr_0 = 4.3937e-04
Validation mae = 0.114273
Epoch 12
Loss = 4.3153e-01, PNorm = 57.9137, GNorm = 1.4613, lr_0 = 4.3907e-04
Loss = 4.2468e-01, PNorm = 57.9225, GNorm = 1.5476, lr_0 = 4.3877e-04
Loss = 3.8805e-01, PNorm = 57.9240, GNorm = 1.2106, lr_0 = 4.3846e-04
Loss = 3.7735e-01, PNorm = 57.9278, GNorm = 1.1205, lr_0 = 4.3816e-04
Loss = 3.6260e-01, PNorm = 57.9324, GNorm = 0.8182, lr_0 = 4.3786e-04
Loss = 4.2717e-01, PNorm = 57.9407, GNorm = 1.1318, lr_0 = 4.3756e-04
Loss = 3.6831e-01, PNorm = 57.9498, GNorm = 1.4946, lr_0 = 4.3726e-04
Loss = 4.4100e-01, PNorm = 57.9573, GNorm = 1.1242, lr_0 = 4.3696e-04
Loss = 3.8729e-01, PNorm = 57.9688, GNorm = 1.5238, lr_0 = 4.3667e-04
Loss = 4.5089e-01, PNorm = 57.9759, GNorm = 1.1519, lr_0 = 4.3637e-04
Loss = 4.0093e-01, PNorm = 57.9886, GNorm = 1.1085, lr_0 = 4.3607e-04
Loss = 4.1559e-01, PNorm = 57.9961, GNorm = 1.2060, lr_0 = 4.3577e-04
Loss = 3.6131e-01, PNorm = 58.0116, GNorm = 1.3893, lr_0 = 4.3547e-04
Loss = 4.2417e-01, PNorm = 58.0182, GNorm = 1.7468, lr_0 = 4.3517e-04
Loss = 4.3433e-01, PNorm = 58.0294, GNorm = 1.3665, lr_0 = 4.3487e-04
Loss = 4.3276e-01, PNorm = 58.0343, GNorm = 1.9550, lr_0 = 4.3458e-04
Loss = 3.6276e-01, PNorm = 58.0403, GNorm = 1.2815, lr_0 = 4.3428e-04
Loss = 3.9986e-01, PNorm = 58.0431, GNorm = 1.9409, lr_0 = 4.3398e-04
Loss = 4.5054e-01, PNorm = 58.0455, GNorm = 1.6596, lr_0 = 4.3368e-04
Loss = 3.5657e-01, PNorm = 58.0551, GNorm = 1.0995, lr_0 = 4.3339e-04
Loss = 4.3593e-01, PNorm = 58.0591, GNorm = 1.1649, lr_0 = 4.3309e-04
Loss = 3.6545e-01, PNorm = 58.0697, GNorm = 1.5036, lr_0 = 4.3279e-04
Loss = 4.0163e-01, PNorm = 58.0809, GNorm = 1.4491, lr_0 = 4.3250e-04
Loss = 4.5342e-01, PNorm = 58.0873, GNorm = 1.8439, lr_0 = 4.3220e-04
Loss = 3.7156e-01, PNorm = 58.0940, GNorm = 1.3610, lr_0 = 4.3190e-04
Loss = 3.5557e-01, PNorm = 58.1055, GNorm = 1.6286, lr_0 = 4.3161e-04
Loss = 3.9031e-01, PNorm = 58.1116, GNorm = 1.5034, lr_0 = 4.3131e-04
Loss = 4.0517e-01, PNorm = 58.1172, GNorm = 1.3115, lr_0 = 4.3102e-04
Loss = 4.1911e-01, PNorm = 58.1204, GNorm = 1.2123, lr_0 = 4.3072e-04
Loss = 3.7951e-01, PNorm = 58.1243, GNorm = 1.2389, lr_0 = 4.3043e-04
Loss = 4.2513e-01, PNorm = 58.1331, GNorm = 1.4742, lr_0 = 4.3013e-04
Loss = 3.8259e-01, PNorm = 58.1373, GNorm = 1.1188, lr_0 = 4.2984e-04
Loss = 4.5610e-01, PNorm = 58.1438, GNorm = 2.5212, lr_0 = 4.2954e-04
Loss = 3.9846e-01, PNorm = 58.1512, GNorm = 1.0177, lr_0 = 4.2925e-04
Loss = 3.2923e-01, PNorm = 58.1540, GNorm = 1.0785, lr_0 = 4.2895e-04
Loss = 3.3163e-01, PNorm = 58.1577, GNorm = 1.2245, lr_0 = 4.2866e-04
Loss = 3.9174e-01, PNorm = 58.1637, GNorm = 1.2353, lr_0 = 4.2837e-04
Loss = 3.4236e-01, PNorm = 58.1739, GNorm = 1.3239, lr_0 = 4.2807e-04
Loss = 3.8214e-01, PNorm = 58.1790, GNorm = 1.3550, lr_0 = 4.2778e-04
Loss = 4.2281e-01, PNorm = 58.1875, GNorm = 1.8888, lr_0 = 4.2749e-04
Loss = 4.1872e-01, PNorm = 58.1927, GNorm = 1.4397, lr_0 = 4.2719e-04
Loss = 3.7558e-01, PNorm = 58.2004, GNorm = 1.0812, lr_0 = 4.2690e-04
Loss = 3.8920e-01, PNorm = 58.2085, GNorm = 1.4657, lr_0 = 4.2661e-04
Loss = 3.8849e-01, PNorm = 58.2162, GNorm = 1.2777, lr_0 = 4.2632e-04
Loss = 3.7230e-01, PNorm = 58.2200, GNorm = 1.6428, lr_0 = 4.2602e-04
Loss = 4.1462e-01, PNorm = 58.2292, GNorm = 2.6290, lr_0 = 4.2573e-04
Loss = 4.2629e-01, PNorm = 58.2334, GNorm = 1.1359, lr_0 = 4.2544e-04
Loss = 3.9246e-01, PNorm = 58.2428, GNorm = 1.4646, lr_0 = 4.2515e-04
Loss = 4.0837e-01, PNorm = 58.2525, GNorm = 1.4974, lr_0 = 4.2486e-04
Loss = 4.2384e-01, PNorm = 58.2568, GNorm = 1.3531, lr_0 = 4.2457e-04
Loss = 4.1924e-01, PNorm = 58.2625, GNorm = 1.3107, lr_0 = 4.2428e-04
Loss = 3.4731e-01, PNorm = 58.2690, GNorm = 1.2020, lr_0 = 4.2399e-04
Loss = 3.8388e-01, PNorm = 58.2808, GNorm = 1.5791, lr_0 = 4.2370e-04
Loss = 3.6989e-01, PNorm = 58.2828, GNorm = 1.1324, lr_0 = 4.2340e-04
Loss = 4.4141e-01, PNorm = 58.2861, GNorm = 1.6994, lr_0 = 4.2311e-04
Loss = 4.4002e-01, PNorm = 58.2924, GNorm = 1.3856, lr_0 = 4.2283e-04
Loss = 4.0712e-01, PNorm = 58.2984, GNorm = 1.1270, lr_0 = 4.2254e-04
Loss = 4.1831e-01, PNorm = 58.3047, GNorm = 1.4110, lr_0 = 4.2225e-04
Loss = 4.2388e-01, PNorm = 58.3151, GNorm = 1.2187, lr_0 = 4.2196e-04
Loss = 4.2863e-01, PNorm = 58.3223, GNorm = 2.7484, lr_0 = 4.2167e-04
Loss = 4.2861e-01, PNorm = 58.3293, GNorm = 1.2356, lr_0 = 4.2138e-04
Loss = 3.8003e-01, PNorm = 58.3378, GNorm = 2.0401, lr_0 = 4.2109e-04
Loss = 4.2147e-01, PNorm = 58.3454, GNorm = 1.3077, lr_0 = 4.2080e-04
Loss = 4.0082e-01, PNorm = 58.3518, GNorm = 1.3036, lr_0 = 4.2051e-04
Loss = 4.6020e-01, PNorm = 58.3601, GNorm = 1.7803, lr_0 = 4.2023e-04
Loss = 4.3565e-01, PNorm = 58.3680, GNorm = 1.1077, lr_0 = 4.1994e-04
Loss = 3.8329e-01, PNorm = 58.3756, GNorm = 1.4520, lr_0 = 4.1965e-04
Loss = 3.2885e-01, PNorm = 58.3843, GNorm = 0.8739, lr_0 = 4.1936e-04
Loss = 3.6132e-01, PNorm = 58.3859, GNorm = 0.9809, lr_0 = 4.1907e-04
Loss = 4.0876e-01, PNorm = 58.3925, GNorm = 1.4145, lr_0 = 4.1879e-04
Loss = 4.0395e-01, PNorm = 58.3983, GNorm = 1.3562, lr_0 = 4.1850e-04
Loss = 3.6237e-01, PNorm = 58.4029, GNorm = 0.9205, lr_0 = 4.1821e-04
Loss = 3.6368e-01, PNorm = 58.4081, GNorm = 1.2964, lr_0 = 4.1793e-04
Loss = 4.1389e-01, PNorm = 58.4152, GNorm = 1.2547, lr_0 = 4.1764e-04
Loss = 3.9703e-01, PNorm = 58.4205, GNorm = 1.3338, lr_0 = 4.1736e-04
Loss = 4.1631e-01, PNorm = 58.4266, GNorm = 0.7476, lr_0 = 4.1707e-04
Loss = 4.2851e-01, PNorm = 58.4332, GNorm = 1.2520, lr_0 = 4.1678e-04
Loss = 4.0501e-01, PNorm = 58.4360, GNorm = 1.0299, lr_0 = 4.1650e-04
Loss = 3.8662e-01, PNorm = 58.4427, GNorm = 1.6326, lr_0 = 4.1621e-04
Loss = 3.8484e-01, PNorm = 58.4467, GNorm = 2.0620, lr_0 = 4.1593e-04
Loss = 3.9107e-01, PNorm = 58.4538, GNorm = 1.5782, lr_0 = 4.1564e-04
Loss = 3.8572e-01, PNorm = 58.4557, GNorm = 1.1681, lr_0 = 4.1536e-04
Loss = 4.2208e-01, PNorm = 58.4608, GNorm = 0.9862, lr_0 = 4.1507e-04
Loss = 4.1176e-01, PNorm = 58.4680, GNorm = 1.0624, lr_0 = 4.1479e-04
Loss = 3.6440e-01, PNorm = 58.4779, GNorm = 1.3380, lr_0 = 4.1450e-04
Loss = 3.8221e-01, PNorm = 58.4842, GNorm = 1.3466, lr_0 = 4.1422e-04
Loss = 3.7972e-01, PNorm = 58.4849, GNorm = 1.2557, lr_0 = 4.1394e-04
Loss = 3.6154e-01, PNorm = 58.4898, GNorm = 1.3013, lr_0 = 4.1365e-04
Loss = 3.6925e-01, PNorm = 58.4936, GNorm = 1.7087, lr_0 = 4.1337e-04
Loss = 4.4312e-01, PNorm = 58.5018, GNorm = 2.1610, lr_0 = 4.1309e-04
Loss = 3.9496e-01, PNorm = 58.5167, GNorm = 1.6726, lr_0 = 4.1280e-04
Loss = 3.9983e-01, PNorm = 58.5261, GNorm = 1.4717, lr_0 = 4.1252e-04
Loss = 4.3688e-01, PNorm = 58.5375, GNorm = 1.6436, lr_0 = 4.1224e-04
Loss = 3.9604e-01, PNorm = 58.5439, GNorm = 2.1712, lr_0 = 4.1196e-04
Loss = 3.9609e-01, PNorm = 58.5493, GNorm = 1.2708, lr_0 = 4.1167e-04
Loss = 5.2703e-01, PNorm = 58.5551, GNorm = 2.1666, lr_0 = 4.1139e-04
Loss = 3.8381e-01, PNorm = 58.5620, GNorm = 1.4989, lr_0 = 4.1111e-04
Loss = 4.6402e-01, PNorm = 58.5679, GNorm = 1.3148, lr_0 = 4.1083e-04
Loss = 3.9423e-01, PNorm = 58.5793, GNorm = 1.6232, lr_0 = 4.1055e-04
Loss = 3.9540e-01, PNorm = 58.5854, GNorm = 1.3324, lr_0 = 4.1027e-04
Loss = 4.0113e-01, PNorm = 58.5927, GNorm = 1.7834, lr_0 = 4.0998e-04
Loss = 3.8980e-01, PNorm = 58.5980, GNorm = 1.1883, lr_0 = 4.0970e-04
Loss = 3.9294e-01, PNorm = 58.6020, GNorm = 1.3157, lr_0 = 4.0942e-04
Loss = 4.2261e-01, PNorm = 58.6109, GNorm = 2.0890, lr_0 = 4.0914e-04
Loss = 4.0467e-01, PNorm = 58.6171, GNorm = 1.2837, lr_0 = 4.0886e-04
Loss = 4.1511e-01, PNorm = 58.6221, GNorm = 1.5587, lr_0 = 4.0858e-04
Loss = 3.7069e-01, PNorm = 58.6312, GNorm = 0.9288, lr_0 = 4.0830e-04
Loss = 4.2839e-01, PNorm = 58.6377, GNorm = 1.5377, lr_0 = 4.0802e-04
Loss = 4.3578e-01, PNorm = 58.6464, GNorm = 1.3275, lr_0 = 4.0774e-04
Loss = 3.9820e-01, PNorm = 58.6513, GNorm = 1.4864, lr_0 = 4.0746e-04
Loss = 3.7744e-01, PNorm = 58.6564, GNorm = 0.9823, lr_0 = 4.0718e-04
Loss = 4.1339e-01, PNorm = 58.6620, GNorm = 1.5895, lr_0 = 4.0691e-04
Loss = 4.1392e-01, PNorm = 58.6674, GNorm = 1.9577, lr_0 = 4.0663e-04
Loss = 4.0471e-01, PNorm = 58.6742, GNorm = 1.1202, lr_0 = 4.0635e-04
Loss = 3.6254e-01, PNorm = 58.6852, GNorm = 0.9940, lr_0 = 4.0607e-04
Loss = 3.9571e-01, PNorm = 58.6929, GNorm = 1.5074, lr_0 = 4.0579e-04
Loss = 4.0701e-01, PNorm = 58.6974, GNorm = 1.2367, lr_0 = 4.0551e-04
Loss = 3.6825e-01, PNorm = 58.7053, GNorm = 1.8910, lr_0 = 4.0524e-04
Loss = 4.4553e-01, PNorm = 58.7084, GNorm = 1.6217, lr_0 = 4.0496e-04
Loss = 3.6654e-01, PNorm = 58.7203, GNorm = 2.3111, lr_0 = 4.0468e-04
Validation mae = 0.114714
Epoch 13
Loss = 3.6319e-01, PNorm = 58.7251, GNorm = 1.1975, lr_0 = 4.0440e-04
Loss = 3.8168e-01, PNorm = 58.7318, GNorm = 1.0757, lr_0 = 4.0413e-04
Loss = 3.6408e-01, PNorm = 58.7368, GNorm = 1.1510, lr_0 = 4.0385e-04
Loss = 4.7516e-01, PNorm = 58.7374, GNorm = 1.5648, lr_0 = 4.0357e-04
Loss = 3.5944e-01, PNorm = 58.7495, GNorm = 1.2723, lr_0 = 4.0330e-04
Loss = 3.9762e-01, PNorm = 58.7599, GNorm = 1.0113, lr_0 = 4.0302e-04
Loss = 4.0554e-01, PNorm = 58.7706, GNorm = 1.0730, lr_0 = 4.0274e-04
Loss = 3.7942e-01, PNorm = 58.7766, GNorm = 1.1259, lr_0 = 4.0247e-04
Loss = 3.9805e-01, PNorm = 58.7807, GNorm = 1.4830, lr_0 = 4.0219e-04
Loss = 3.9901e-01, PNorm = 58.7895, GNorm = 1.0739, lr_0 = 4.0192e-04
Loss = 3.4951e-01, PNorm = 58.7973, GNorm = 1.0542, lr_0 = 4.0164e-04
Loss = 4.8915e-01, PNorm = 58.7983, GNorm = 1.9552, lr_0 = 4.0137e-04
Loss = 3.8523e-01, PNorm = 58.8018, GNorm = 0.9708, lr_0 = 4.0109e-04
Loss = 3.3451e-01, PNorm = 58.8080, GNorm = 1.0314, lr_0 = 4.0082e-04
Loss = 3.8962e-01, PNorm = 58.8141, GNorm = 1.5463, lr_0 = 4.0054e-04
Loss = 3.3457e-01, PNorm = 58.8216, GNorm = 1.1815, lr_0 = 4.0027e-04
Loss = 3.7008e-01, PNorm = 58.8270, GNorm = 1.5926, lr_0 = 3.9999e-04
Loss = 3.9144e-01, PNorm = 58.8282, GNorm = 1.3030, lr_0 = 3.9972e-04
Loss = 3.9195e-01, PNorm = 58.8339, GNorm = 1.7496, lr_0 = 3.9945e-04
Loss = 3.8479e-01, PNorm = 58.8358, GNorm = 1.4315, lr_0 = 3.9917e-04
Loss = 4.5917e-01, PNorm = 58.8401, GNorm = 1.5580, lr_0 = 3.9890e-04
Loss = 4.3076e-01, PNorm = 58.8445, GNorm = 2.8955, lr_0 = 3.9863e-04
Loss = 4.2825e-01, PNorm = 58.8495, GNorm = 1.3219, lr_0 = 3.9835e-04
Loss = 4.1149e-01, PNorm = 58.8541, GNorm = 1.4061, lr_0 = 3.9808e-04
Loss = 4.4347e-01, PNorm = 58.8599, GNorm = 0.9687, lr_0 = 3.9781e-04
Loss = 3.9973e-01, PNorm = 58.8683, GNorm = 0.9817, lr_0 = 3.9753e-04
Loss = 4.1175e-01, PNorm = 58.8756, GNorm = 1.1659, lr_0 = 3.9726e-04
Loss = 4.2531e-01, PNorm = 58.8847, GNorm = 2.8215, lr_0 = 3.9699e-04
Loss = 4.2579e-01, PNorm = 58.8898, GNorm = 1.4542, lr_0 = 3.9672e-04
Loss = 3.7593e-01, PNorm = 58.8988, GNorm = 1.3487, lr_0 = 3.9645e-04
Loss = 3.7481e-01, PNorm = 58.9039, GNorm = 1.8802, lr_0 = 3.9617e-04
Loss = 4.1249e-01, PNorm = 58.9147, GNorm = 1.7503, lr_0 = 3.9590e-04
Loss = 3.8673e-01, PNorm = 58.9217, GNorm = 1.5873, lr_0 = 3.9563e-04
Loss = 3.5937e-01, PNorm = 58.9283, GNorm = 1.2832, lr_0 = 3.9536e-04
Loss = 4.1236e-01, PNorm = 58.9389, GNorm = 1.7037, lr_0 = 3.9509e-04
Loss = 4.0447e-01, PNorm = 58.9473, GNorm = 1.8712, lr_0 = 3.9482e-04
Loss = 3.9125e-01, PNorm = 58.9582, GNorm = 1.6748, lr_0 = 3.9455e-04
Loss = 3.9160e-01, PNorm = 58.9585, GNorm = 1.3360, lr_0 = 3.9428e-04
Loss = 3.6828e-01, PNorm = 58.9677, GNorm = 1.1543, lr_0 = 3.9401e-04
Loss = 4.0865e-01, PNorm = 58.9700, GNorm = 1.3553, lr_0 = 3.9374e-04
Loss = 3.6791e-01, PNorm = 58.9818, GNorm = 1.6615, lr_0 = 3.9347e-04
Loss = 3.7809e-01, PNorm = 58.9858, GNorm = 1.1211, lr_0 = 3.9320e-04
Loss = 3.5369e-01, PNorm = 58.9926, GNorm = 1.0753, lr_0 = 3.9293e-04
Loss = 3.9961e-01, PNorm = 58.9971, GNorm = 1.6352, lr_0 = 3.9266e-04
Loss = 3.8260e-01, PNorm = 59.0007, GNorm = 1.6306, lr_0 = 3.9239e-04
Loss = 3.8498e-01, PNorm = 59.0093, GNorm = 1.2670, lr_0 = 3.9212e-04
Loss = 4.0559e-01, PNorm = 59.0132, GNorm = 1.1604, lr_0 = 3.9185e-04
Loss = 4.3076e-01, PNorm = 59.0201, GNorm = 1.5121, lr_0 = 3.9159e-04
Loss = 3.3914e-01, PNorm = 59.0301, GNorm = 1.6143, lr_0 = 3.9132e-04
Loss = 3.6822e-01, PNorm = 59.0326, GNorm = 1.2389, lr_0 = 3.9105e-04
Loss = 4.3884e-01, PNorm = 59.0391, GNorm = 1.2656, lr_0 = 3.9078e-04
Loss = 4.0052e-01, PNorm = 59.0465, GNorm = 1.4468, lr_0 = 3.9051e-04
Loss = 3.5654e-01, PNorm = 59.0549, GNorm = 1.5753, lr_0 = 3.9025e-04
Loss = 3.6586e-01, PNorm = 59.0656, GNorm = 2.2103, lr_0 = 3.8998e-04
Loss = 3.5613e-01, PNorm = 59.0748, GNorm = 1.2206, lr_0 = 3.8971e-04
Loss = 3.7076e-01, PNorm = 59.0831, GNorm = 1.3866, lr_0 = 3.8945e-04
Loss = 3.5726e-01, PNorm = 59.0888, GNorm = 0.9858, lr_0 = 3.8918e-04
Loss = 3.7751e-01, PNorm = 59.0984, GNorm = 1.4220, lr_0 = 3.8891e-04
Loss = 3.8182e-01, PNorm = 59.1023, GNorm = 2.0483, lr_0 = 3.8865e-04
Loss = 4.1905e-01, PNorm = 59.1097, GNorm = 1.2883, lr_0 = 3.8838e-04
Loss = 4.0292e-01, PNorm = 59.1155, GNorm = 1.3915, lr_0 = 3.8811e-04
Loss = 4.5103e-01, PNorm = 59.1227, GNorm = 1.6504, lr_0 = 3.8785e-04
Loss = 4.3125e-01, PNorm = 59.1267, GNorm = 1.4210, lr_0 = 3.8758e-04
Loss = 3.8700e-01, PNorm = 59.1249, GNorm = 1.7047, lr_0 = 3.8732e-04
Loss = 4.0197e-01, PNorm = 59.1325, GNorm = 1.6685, lr_0 = 3.8705e-04
Loss = 3.7956e-01, PNorm = 59.1360, GNorm = 1.1019, lr_0 = 3.8679e-04
Loss = 4.0541e-01, PNorm = 59.1415, GNorm = 1.2494, lr_0 = 3.8652e-04
Loss = 4.6884e-01, PNorm = 59.1514, GNorm = 1.4337, lr_0 = 3.8626e-04
Loss = 4.1957e-01, PNorm = 59.1574, GNorm = 1.2910, lr_0 = 3.8599e-04
Loss = 4.2013e-01, PNorm = 59.1607, GNorm = 0.9465, lr_0 = 3.8573e-04
Loss = 3.7438e-01, PNorm = 59.1654, GNorm = 1.0537, lr_0 = 3.8546e-04
Loss = 4.2329e-01, PNorm = 59.1708, GNorm = 1.0372, lr_0 = 3.8520e-04
Loss = 4.0109e-01, PNorm = 59.1782, GNorm = 1.5045, lr_0 = 3.8493e-04
Loss = 3.9795e-01, PNorm = 59.1827, GNorm = 1.3682, lr_0 = 3.8467e-04
Loss = 3.6277e-01, PNorm = 59.1887, GNorm = 1.5973, lr_0 = 3.8441e-04
Loss = 3.7187e-01, PNorm = 59.1889, GNorm = 1.1554, lr_0 = 3.8414e-04
Loss = 3.5527e-01, PNorm = 59.1937, GNorm = 1.0747, lr_0 = 3.8388e-04
Loss = 3.8213e-01, PNorm = 59.2010, GNorm = 1.4925, lr_0 = 3.8362e-04
Loss = 3.9283e-01, PNorm = 59.2105, GNorm = 1.4094, lr_0 = 3.8336e-04
Loss = 3.9802e-01, PNorm = 59.2141, GNorm = 1.1724, lr_0 = 3.8309e-04
Loss = 3.3604e-01, PNorm = 59.2149, GNorm = 1.4660, lr_0 = 3.8283e-04
Loss = 3.4568e-01, PNorm = 59.2189, GNorm = 1.3008, lr_0 = 3.8257e-04
Loss = 4.1664e-01, PNorm = 59.2207, GNorm = 1.1392, lr_0 = 3.8231e-04
Loss = 3.7454e-01, PNorm = 59.2249, GNorm = 1.4492, lr_0 = 3.8204e-04
Loss = 3.8909e-01, PNorm = 59.2319, GNorm = 1.1252, lr_0 = 3.8178e-04
Loss = 3.7881e-01, PNorm = 59.2379, GNorm = 1.5572, lr_0 = 3.8152e-04
Loss = 4.5390e-01, PNorm = 59.2465, GNorm = 1.0862, lr_0 = 3.8126e-04
Loss = 4.0187e-01, PNorm = 59.2520, GNorm = 1.7858, lr_0 = 3.8100e-04
Loss = 4.0943e-01, PNorm = 59.2604, GNorm = 1.1550, lr_0 = 3.8074e-04
Loss = 4.3860e-01, PNorm = 59.2664, GNorm = 1.7759, lr_0 = 3.8048e-04
Loss = 3.8972e-01, PNorm = 59.2748, GNorm = 1.1961, lr_0 = 3.8022e-04
Loss = 3.5776e-01, PNorm = 59.2804, GNorm = 1.0762, lr_0 = 3.7995e-04
Loss = 3.7225e-01, PNorm = 59.2864, GNorm = 1.3312, lr_0 = 3.7969e-04
Loss = 4.0456e-01, PNorm = 59.2899, GNorm = 1.1533, lr_0 = 3.7943e-04
Loss = 4.1614e-01, PNorm = 59.2956, GNorm = 1.2650, lr_0 = 3.7917e-04
Loss = 3.8505e-01, PNorm = 59.3000, GNorm = 1.3283, lr_0 = 3.7891e-04
Loss = 3.9323e-01, PNorm = 59.3064, GNorm = 1.3365, lr_0 = 3.7866e-04
Loss = 3.6674e-01, PNorm = 59.3085, GNorm = 1.4721, lr_0 = 3.7840e-04
Loss = 4.1564e-01, PNorm = 59.3110, GNorm = 1.4804, lr_0 = 3.7814e-04
Loss = 3.7237e-01, PNorm = 59.3189, GNorm = 1.6740, lr_0 = 3.7788e-04
Loss = 4.1803e-01, PNorm = 59.3217, GNorm = 1.4416, lr_0 = 3.7762e-04
Loss = 3.4555e-01, PNorm = 59.3341, GNorm = 1.0840, lr_0 = 3.7736e-04
Loss = 4.0227e-01, PNorm = 59.3374, GNorm = 1.5337, lr_0 = 3.7710e-04
Loss = 4.2533e-01, PNorm = 59.3376, GNorm = 1.3200, lr_0 = 3.7684e-04
Loss = 3.1701e-01, PNorm = 59.3492, GNorm = 1.4301, lr_0 = 3.7659e-04
Loss = 4.5188e-01, PNorm = 59.3499, GNorm = 1.3580, lr_0 = 3.7633e-04
Loss = 4.6751e-01, PNorm = 59.3643, GNorm = 1.5109, lr_0 = 3.7607e-04
Loss = 3.5068e-01, PNorm = 59.3753, GNorm = 1.3603, lr_0 = 3.7581e-04
Loss = 4.2223e-01, PNorm = 59.3829, GNorm = 1.3979, lr_0 = 3.7555e-04
Loss = 3.4716e-01, PNorm = 59.3915, GNorm = 1.5786, lr_0 = 3.7530e-04
Loss = 3.6708e-01, PNorm = 59.3950, GNorm = 1.3494, lr_0 = 3.7504e-04
Loss = 3.7859e-01, PNorm = 59.4015, GNorm = 1.5523, lr_0 = 3.7478e-04
Loss = 4.4766e-01, PNorm = 59.4085, GNorm = 1.3703, lr_0 = 3.7453e-04
Loss = 4.1488e-01, PNorm = 59.4149, GNorm = 1.8309, lr_0 = 3.7427e-04
Loss = 3.7140e-01, PNorm = 59.4222, GNorm = 1.2443, lr_0 = 3.7401e-04
Loss = 4.7231e-01, PNorm = 59.4314, GNorm = 1.8507, lr_0 = 3.7376e-04
Loss = 3.4683e-01, PNorm = 59.4398, GNorm = 1.5643, lr_0 = 3.7350e-04
Loss = 4.0592e-01, PNorm = 59.4412, GNorm = 1.3381, lr_0 = 3.7325e-04
Loss = 4.1410e-01, PNorm = 59.4451, GNorm = 1.1130, lr_0 = 3.7299e-04
Loss = 4.0045e-01, PNorm = 59.4513, GNorm = 1.1290, lr_0 = 3.7273e-04
Validation mae = 0.113528
Epoch 14
Loss = 3.8618e-01, PNorm = 59.4562, GNorm = 1.1988, lr_0 = 3.7248e-04
Loss = 3.7292e-01, PNorm = 59.4575, GNorm = 1.3012, lr_0 = 3.7222e-04
Loss = 4.4923e-01, PNorm = 59.4639, GNorm = 1.1872, lr_0 = 3.7197e-04
Loss = 3.7146e-01, PNorm = 59.4733, GNorm = 1.0606, lr_0 = 3.7171e-04
Loss = 4.0697e-01, PNorm = 59.4736, GNorm = 1.4340, lr_0 = 3.7146e-04
Loss = 3.5708e-01, PNorm = 59.4743, GNorm = 1.4370, lr_0 = 3.7120e-04
Loss = 4.0074e-01, PNorm = 59.4819, GNorm = 1.3085, lr_0 = 3.7095e-04
Loss = 3.6068e-01, PNorm = 59.4890, GNorm = 1.6505, lr_0 = 3.7070e-04
Loss = 3.7854e-01, PNorm = 59.4932, GNorm = 1.6334, lr_0 = 3.7044e-04
Loss = 4.1142e-01, PNorm = 59.5009, GNorm = 1.5170, lr_0 = 3.7019e-04
Loss = 3.6995e-01, PNorm = 59.5082, GNorm = 1.1913, lr_0 = 3.6993e-04
Loss = 3.4709e-01, PNorm = 59.5145, GNorm = 1.9394, lr_0 = 3.6968e-04
Loss = 4.5479e-01, PNorm = 59.5232, GNorm = 1.0394, lr_0 = 3.6943e-04
Loss = 4.2172e-01, PNorm = 59.5252, GNorm = 2.5607, lr_0 = 3.6917e-04
Loss = 4.0059e-01, PNorm = 59.5325, GNorm = 1.0137, lr_0 = 3.6892e-04
Loss = 3.9253e-01, PNorm = 59.5393, GNorm = 2.3840, lr_0 = 3.6867e-04
Loss = 4.7073e-01, PNorm = 59.5458, GNorm = 2.2771, lr_0 = 3.6842e-04
Loss = 3.7310e-01, PNorm = 59.5529, GNorm = 1.4153, lr_0 = 3.6816e-04
Loss = 4.1567e-01, PNorm = 59.5522, GNorm = 1.2611, lr_0 = 3.6791e-04
Loss = 3.2751e-01, PNorm = 59.5613, GNorm = 1.0005, lr_0 = 3.6766e-04
Loss = 4.2337e-01, PNorm = 59.5675, GNorm = 1.4035, lr_0 = 3.6741e-04
Loss = 3.5403e-01, PNorm = 59.5765, GNorm = 1.0119, lr_0 = 3.6716e-04
Loss = 4.0100e-01, PNorm = 59.5778, GNorm = 1.0192, lr_0 = 3.6690e-04
Loss = 3.5105e-01, PNorm = 59.5861, GNorm = 1.5987, lr_0 = 3.6665e-04
Loss = 3.7701e-01, PNorm = 59.5923, GNorm = 1.3172, lr_0 = 3.6640e-04
Loss = 3.4446e-01, PNorm = 59.5983, GNorm = 0.7316, lr_0 = 3.6615e-04
Loss = 4.2507e-01, PNorm = 59.6042, GNorm = 1.2605, lr_0 = 3.6590e-04
Loss = 3.8738e-01, PNorm = 59.6100, GNorm = 1.3755, lr_0 = 3.6565e-04
Loss = 3.8272e-01, PNorm = 59.6147, GNorm = 1.3607, lr_0 = 3.6540e-04
Loss = 4.2498e-01, PNorm = 59.6177, GNorm = 1.7973, lr_0 = 3.6515e-04
Loss = 3.3849e-01, PNorm = 59.6212, GNorm = 1.2874, lr_0 = 3.6490e-04
Loss = 3.8842e-01, PNorm = 59.6228, GNorm = 1.4265, lr_0 = 3.6465e-04
Loss = 3.8081e-01, PNorm = 59.6320, GNorm = 1.3565, lr_0 = 3.6440e-04
Loss = 4.2351e-01, PNorm = 59.6359, GNorm = 1.9353, lr_0 = 3.6415e-04
Loss = 3.5778e-01, PNorm = 59.6408, GNorm = 1.1957, lr_0 = 3.6390e-04
Loss = 4.1994e-01, PNorm = 59.6459, GNorm = 1.7688, lr_0 = 3.6365e-04
Loss = 3.4961e-01, PNorm = 59.6570, GNorm = 1.3566, lr_0 = 3.6340e-04
Loss = 4.4054e-01, PNorm = 59.6645, GNorm = 1.6259, lr_0 = 3.6315e-04
Loss = 3.9111e-01, PNorm = 59.6676, GNorm = 1.1142, lr_0 = 3.6290e-04
Loss = 4.0685e-01, PNorm = 59.6745, GNorm = 1.2232, lr_0 = 3.6266e-04
Loss = 3.9522e-01, PNorm = 59.6801, GNorm = 1.2890, lr_0 = 3.6241e-04
Loss = 3.4701e-01, PNorm = 59.6830, GNorm = 1.1569, lr_0 = 3.6216e-04
Loss = 4.1037e-01, PNorm = 59.6830, GNorm = 1.4771, lr_0 = 3.6191e-04
Loss = 3.9512e-01, PNorm = 59.6863, GNorm = 1.2334, lr_0 = 3.6166e-04
Loss = 4.3471e-01, PNorm = 59.6906, GNorm = 1.6180, lr_0 = 3.6141e-04
Loss = 3.7078e-01, PNorm = 59.6951, GNorm = 1.1174, lr_0 = 3.6117e-04
Loss = 3.7389e-01, PNorm = 59.6993, GNorm = 1.8538, lr_0 = 3.6092e-04
Loss = 3.9156e-01, PNorm = 59.7014, GNorm = 1.4324, lr_0 = 3.6067e-04
Loss = 3.5661e-01, PNorm = 59.7082, GNorm = 1.7509, lr_0 = 3.6043e-04
Loss = 4.4044e-01, PNorm = 59.7101, GNorm = 1.7832, lr_0 = 3.6018e-04
Loss = 3.7584e-01, PNorm = 59.7156, GNorm = 1.4570, lr_0 = 3.5993e-04
Loss = 3.9525e-01, PNorm = 59.7205, GNorm = 2.1002, lr_0 = 3.5969e-04
Loss = 4.4170e-01, PNorm = 59.7300, GNorm = 1.2689, lr_0 = 3.5944e-04
Loss = 4.2373e-01, PNorm = 59.7367, GNorm = 1.1842, lr_0 = 3.5919e-04
Loss = 3.3730e-01, PNorm = 59.7401, GNorm = 0.9999, lr_0 = 3.5895e-04
Loss = 3.7843e-01, PNorm = 59.7438, GNorm = 1.0552, lr_0 = 3.5870e-04
Loss = 3.9771e-01, PNorm = 59.7534, GNorm = 1.3019, lr_0 = 3.5845e-04
Loss = 4.2209e-01, PNorm = 59.7583, GNorm = 1.6068, lr_0 = 3.5821e-04
Loss = 3.7937e-01, PNorm = 59.7614, GNorm = 1.2788, lr_0 = 3.5796e-04
Loss = 3.6073e-01, PNorm = 59.7662, GNorm = 1.2260, lr_0 = 3.5772e-04
Loss = 3.5615e-01, PNorm = 59.7737, GNorm = 1.9764, lr_0 = 3.5747e-04
Loss = 4.0788e-01, PNorm = 59.7811, GNorm = 1.4369, lr_0 = 3.5723e-04
Loss = 3.7855e-01, PNorm = 59.7855, GNorm = 1.3283, lr_0 = 3.5698e-04
Loss = 4.0972e-01, PNorm = 59.7928, GNorm = 2.4278, lr_0 = 3.5674e-04
Loss = 3.5686e-01, PNorm = 59.7989, GNorm = 1.5516, lr_0 = 3.5650e-04
Loss = 4.3966e-01, PNorm = 59.8070, GNorm = 1.1681, lr_0 = 3.5625e-04
Loss = 3.6721e-01, PNorm = 59.8157, GNorm = 1.2912, lr_0 = 3.5601e-04
Loss = 4.2919e-01, PNorm = 59.8169, GNorm = 1.5411, lr_0 = 3.5576e-04
Loss = 3.6465e-01, PNorm = 59.8213, GNorm = 1.2190, lr_0 = 3.5552e-04
Loss = 4.1389e-01, PNorm = 59.8234, GNorm = 1.6102, lr_0 = 3.5528e-04
Loss = 3.8404e-01, PNorm = 59.8328, GNorm = 1.1441, lr_0 = 3.5503e-04
Loss = 4.2833e-01, PNorm = 59.8427, GNorm = 1.3775, lr_0 = 3.5479e-04
Loss = 3.8409e-01, PNorm = 59.8472, GNorm = 1.1918, lr_0 = 3.5455e-04
Loss = 3.6337e-01, PNorm = 59.8508, GNorm = 1.6179, lr_0 = 3.5430e-04
Loss = 3.5584e-01, PNorm = 59.8538, GNorm = 1.5013, lr_0 = 3.5406e-04
Loss = 3.5618e-01, PNorm = 59.8603, GNorm = 1.4538, lr_0 = 3.5382e-04
Loss = 3.2037e-01, PNorm = 59.8628, GNorm = 1.1999, lr_0 = 3.5358e-04
Loss = 3.8691e-01, PNorm = 59.8657, GNorm = 1.6091, lr_0 = 3.5333e-04
Loss = 3.2903e-01, PNorm = 59.8695, GNorm = 1.0248, lr_0 = 3.5309e-04
Loss = 3.7908e-01, PNorm = 59.8750, GNorm = 2.3576, lr_0 = 3.5285e-04
Loss = 3.8034e-01, PNorm = 59.8756, GNorm = 1.4108, lr_0 = 3.5261e-04
Loss = 3.9364e-01, PNorm = 59.8811, GNorm = 1.4400, lr_0 = 3.5237e-04
Loss = 4.3146e-01, PNorm = 59.8839, GNorm = 1.0615, lr_0 = 3.5212e-04
Loss = 4.0341e-01, PNorm = 59.8920, GNorm = 1.2787, lr_0 = 3.5188e-04
Loss = 3.7455e-01, PNorm = 59.8992, GNorm = 1.2803, lr_0 = 3.5164e-04
Loss = 3.8118e-01, PNorm = 59.9049, GNorm = 1.2918, lr_0 = 3.5140e-04
Loss = 3.7873e-01, PNorm = 59.9158, GNorm = 1.5195, lr_0 = 3.5116e-04
Loss = 3.5971e-01, PNorm = 59.9228, GNorm = 1.4071, lr_0 = 3.5092e-04
Loss = 3.8458e-01, PNorm = 59.9255, GNorm = 1.6014, lr_0 = 3.5068e-04
Loss = 4.1984e-01, PNorm = 59.9299, GNorm = 1.1799, lr_0 = 3.5044e-04
Loss = 3.8223e-01, PNorm = 59.9317, GNorm = 1.5470, lr_0 = 3.5020e-04
Loss = 4.2093e-01, PNorm = 59.9356, GNorm = 1.9340, lr_0 = 3.4996e-04
Loss = 3.7851e-01, PNorm = 59.9424, GNorm = 1.3744, lr_0 = 3.4972e-04
Loss = 4.0842e-01, PNorm = 59.9448, GNorm = 1.4509, lr_0 = 3.4948e-04
Loss = 3.6921e-01, PNorm = 59.9517, GNorm = 1.1090, lr_0 = 3.4924e-04
Loss = 3.7033e-01, PNorm = 59.9561, GNorm = 1.4544, lr_0 = 3.4900e-04
Loss = 3.8139e-01, PNorm = 59.9602, GNorm = 1.0003, lr_0 = 3.4876e-04
Loss = 4.0534e-01, PNorm = 59.9611, GNorm = 1.6747, lr_0 = 3.4852e-04
Loss = 3.6029e-01, PNorm = 59.9613, GNorm = 1.0225, lr_0 = 3.4828e-04
Loss = 4.3928e-01, PNorm = 59.9655, GNorm = 1.5425, lr_0 = 3.4805e-04
Loss = 4.9550e-01, PNorm = 59.9735, GNorm = 1.9809, lr_0 = 3.4781e-04
Loss = 3.8241e-01, PNorm = 59.9815, GNorm = 1.2192, lr_0 = 3.4757e-04
Loss = 3.8096e-01, PNorm = 59.9852, GNorm = 1.4798, lr_0 = 3.4733e-04
Loss = 3.9729e-01, PNorm = 59.9862, GNorm = 1.4465, lr_0 = 3.4709e-04
Loss = 3.9536e-01, PNorm = 59.9918, GNorm = 1.5881, lr_0 = 3.4686e-04
Loss = 3.5180e-01, PNorm = 59.9978, GNorm = 1.3198, lr_0 = 3.4662e-04
Loss = 4.3831e-01, PNorm = 60.0001, GNorm = 1.7362, lr_0 = 3.4638e-04
Loss = 4.2659e-01, PNorm = 60.0054, GNorm = 1.3924, lr_0 = 3.4614e-04
Loss = 3.4745e-01, PNorm = 60.0087, GNorm = 1.2321, lr_0 = 3.4591e-04
Loss = 3.7082e-01, PNorm = 60.0115, GNorm = 1.5648, lr_0 = 3.4567e-04
Loss = 3.8625e-01, PNorm = 60.0152, GNorm = 1.3572, lr_0 = 3.4543e-04
Loss = 4.2940e-01, PNorm = 60.0216, GNorm = 1.4710, lr_0 = 3.4520e-04
Loss = 4.2784e-01, PNorm = 60.0272, GNorm = 1.8844, lr_0 = 3.4496e-04
Loss = 3.4990e-01, PNorm = 60.0298, GNorm = 1.5884, lr_0 = 3.4472e-04
Loss = 3.5850e-01, PNorm = 60.0302, GNorm = 1.3116, lr_0 = 3.4449e-04
Loss = 3.7152e-01, PNorm = 60.0334, GNorm = 1.1985, lr_0 = 3.4425e-04
Loss = 3.9797e-01, PNorm = 60.0346, GNorm = 1.2219, lr_0 = 3.4402e-04
Loss = 3.9560e-01, PNorm = 60.0407, GNorm = 1.3599, lr_0 = 3.4378e-04
Loss = 3.5549e-01, PNorm = 60.0436, GNorm = 1.5174, lr_0 = 3.4354e-04
Loss = 4.5158e-01, PNorm = 60.0508, GNorm = 1.2370, lr_0 = 3.4331e-04
Validation mae = 0.112659
Epoch 15
Loss = 4.2098e-01, PNorm = 60.0566, GNorm = 1.7376, lr_0 = 3.4307e-04
Loss = 3.7336e-01, PNorm = 60.0629, GNorm = 1.5315, lr_0 = 3.4284e-04
Loss = 3.7290e-01, PNorm = 60.0686, GNorm = 1.3489, lr_0 = 3.4260e-04
Loss = 3.6470e-01, PNorm = 60.0724, GNorm = 1.3524, lr_0 = 3.4237e-04
Loss = 3.7754e-01, PNorm = 60.0780, GNorm = 1.1715, lr_0 = 3.4213e-04
Loss = 3.4839e-01, PNorm = 60.0781, GNorm = 1.2250, lr_0 = 3.4190e-04
Loss = 3.6417e-01, PNorm = 60.0840, GNorm = 1.6697, lr_0 = 3.4167e-04
Loss = 3.7675e-01, PNorm = 60.0886, GNorm = 1.3330, lr_0 = 3.4143e-04
Loss = 3.9105e-01, PNorm = 60.0936, GNorm = 1.3495, lr_0 = 3.4120e-04
Loss = 3.6924e-01, PNorm = 60.0973, GNorm = 1.7780, lr_0 = 3.4096e-04
Loss = 4.1327e-01, PNorm = 60.1036, GNorm = 2.1149, lr_0 = 3.4073e-04
Loss = 3.9204e-01, PNorm = 60.1088, GNorm = 1.2589, lr_0 = 3.4050e-04
Loss = 3.6148e-01, PNorm = 60.1139, GNorm = 1.1149, lr_0 = 3.4026e-04
Loss = 4.5115e-01, PNorm = 60.1189, GNorm = 1.4595, lr_0 = 3.4003e-04
Loss = 4.0577e-01, PNorm = 60.1284, GNorm = 1.3620, lr_0 = 3.3980e-04
Loss = 3.7434e-01, PNorm = 60.1322, GNorm = 1.6227, lr_0 = 3.3956e-04
Loss = 3.8038e-01, PNorm = 60.1358, GNorm = 1.4672, lr_0 = 3.3933e-04
Loss = 4.0710e-01, PNorm = 60.1425, GNorm = 2.0023, lr_0 = 3.3910e-04
Loss = 3.6553e-01, PNorm = 60.1485, GNorm = 1.1693, lr_0 = 3.3887e-04
Loss = 3.9633e-01, PNorm = 60.1562, GNorm = 1.3403, lr_0 = 3.3864e-04
Loss = 4.1101e-01, PNorm = 60.1621, GNorm = 1.3866, lr_0 = 3.3840e-04
Loss = 3.6314e-01, PNorm = 60.1664, GNorm = 1.3708, lr_0 = 3.3817e-04
Loss = 3.7955e-01, PNorm = 60.1697, GNorm = 1.4942, lr_0 = 3.3794e-04
Loss = 3.7734e-01, PNorm = 60.1713, GNorm = 1.5916, lr_0 = 3.3771e-04
Loss = 3.5557e-01, PNorm = 60.1749, GNorm = 1.1531, lr_0 = 3.3748e-04
Loss = 3.5068e-01, PNorm = 60.1782, GNorm = 1.0278, lr_0 = 3.3725e-04
Loss = 3.6125e-01, PNorm = 60.1824, GNorm = 1.6378, lr_0 = 3.3701e-04
Loss = 3.5811e-01, PNorm = 60.1894, GNorm = 1.3984, lr_0 = 3.3678e-04
Loss = 4.0118e-01, PNorm = 60.1959, GNorm = 1.3365, lr_0 = 3.3655e-04
Loss = 3.5867e-01, PNorm = 60.2029, GNorm = 1.7076, lr_0 = 3.3632e-04
Loss = 3.6889e-01, PNorm = 60.2064, GNorm = 1.4441, lr_0 = 3.3609e-04
Loss = 3.6508e-01, PNorm = 60.2107, GNorm = 1.4228, lr_0 = 3.3586e-04
Loss = 3.8093e-01, PNorm = 60.2156, GNorm = 1.4506, lr_0 = 3.3563e-04
Loss = 3.9248e-01, PNorm = 60.2266, GNorm = 1.1773, lr_0 = 3.3540e-04
Loss = 4.8533e-01, PNorm = 60.2342, GNorm = 1.2113, lr_0 = 3.3517e-04
Loss = 4.0434e-01, PNorm = 60.2381, GNorm = 1.5665, lr_0 = 3.3494e-04
Loss = 4.1418e-01, PNorm = 60.2419, GNorm = 1.6070, lr_0 = 3.3471e-04
Loss = 3.9759e-01, PNorm = 60.2470, GNorm = 1.8945, lr_0 = 3.3448e-04
Loss = 3.6893e-01, PNorm = 60.2553, GNorm = 2.1560, lr_0 = 3.3425e-04
Loss = 3.9112e-01, PNorm = 60.2581, GNorm = 1.7789, lr_0 = 3.3403e-04
Loss = 3.3735e-01, PNorm = 60.2674, GNorm = 1.3431, lr_0 = 3.3380e-04
Loss = 4.4523e-01, PNorm = 60.2715, GNorm = 1.6765, lr_0 = 3.3357e-04
Loss = 3.3282e-01, PNorm = 60.2771, GNorm = 1.2007, lr_0 = 3.3334e-04
Loss = 4.1263e-01, PNorm = 60.2841, GNorm = 1.6966, lr_0 = 3.3311e-04
Loss = 3.6824e-01, PNorm = 60.2913, GNorm = 1.3181, lr_0 = 3.3288e-04
Loss = 3.5485e-01, PNorm = 60.2957, GNorm = 1.2373, lr_0 = 3.3265e-04
Loss = 3.7301e-01, PNorm = 60.2995, GNorm = 1.2062, lr_0 = 3.3243e-04
Loss = 3.8970e-01, PNorm = 60.3026, GNorm = 1.3615, lr_0 = 3.3220e-04
Loss = 3.7746e-01, PNorm = 60.3065, GNorm = 1.5034, lr_0 = 3.3197e-04
Loss = 4.2247e-01, PNorm = 60.3107, GNorm = 1.4517, lr_0 = 3.3174e-04
Loss = 4.0644e-01, PNorm = 60.3204, GNorm = 2.1715, lr_0 = 3.3152e-04
Loss = 4.3550e-01, PNorm = 60.3239, GNorm = 1.5062, lr_0 = 3.3129e-04
Loss = 3.3294e-01, PNorm = 60.3315, GNorm = 1.6915, lr_0 = 3.3106e-04
Loss = 4.0286e-01, PNorm = 60.3338, GNorm = 1.1046, lr_0 = 3.3084e-04
Loss = 4.2871e-01, PNorm = 60.3391, GNorm = 1.9029, lr_0 = 3.3061e-04
Loss = 3.4204e-01, PNorm = 60.3504, GNorm = 1.4519, lr_0 = 3.3038e-04
Loss = 4.3736e-01, PNorm = 60.3583, GNorm = 1.3567, lr_0 = 3.3016e-04
Loss = 4.6294e-01, PNorm = 60.3683, GNorm = 1.7154, lr_0 = 3.2993e-04
Loss = 3.7294e-01, PNorm = 60.3717, GNorm = 1.8045, lr_0 = 3.2970e-04
Loss = 3.7381e-01, PNorm = 60.3795, GNorm = 1.2950, lr_0 = 3.2948e-04
Loss = 3.6297e-01, PNorm = 60.3848, GNorm = 1.2143, lr_0 = 3.2925e-04
Loss = 4.2948e-01, PNorm = 60.3934, GNorm = 1.6336, lr_0 = 3.2903e-04
Loss = 4.1261e-01, PNorm = 60.4025, GNorm = 1.5224, lr_0 = 3.2880e-04
Loss = 4.4406e-01, PNorm = 60.4069, GNorm = 1.1713, lr_0 = 3.2858e-04
Loss = 3.6612e-01, PNorm = 60.4099, GNorm = 1.2459, lr_0 = 3.2835e-04
Loss = 3.9537e-01, PNorm = 60.4142, GNorm = 1.0850, lr_0 = 3.2813e-04
Loss = 3.6448e-01, PNorm = 60.4209, GNorm = 1.1564, lr_0 = 3.2790e-04
Loss = 3.8580e-01, PNorm = 60.4248, GNorm = 1.6562, lr_0 = 3.2768e-04
Loss = 3.6218e-01, PNorm = 60.4280, GNorm = 1.2928, lr_0 = 3.2745e-04
Loss = 3.4672e-01, PNorm = 60.4314, GNorm = 1.5893, lr_0 = 3.2723e-04
Loss = 3.5272e-01, PNorm = 60.4331, GNorm = 1.2623, lr_0 = 3.2700e-04
Loss = 4.2060e-01, PNorm = 60.4399, GNorm = 1.2582, lr_0 = 3.2678e-04
Loss = 3.3321e-01, PNorm = 60.4449, GNorm = 1.1494, lr_0 = 3.2656e-04
Loss = 3.7691e-01, PNorm = 60.4484, GNorm = 1.2846, lr_0 = 3.2633e-04
Loss = 3.3361e-01, PNorm = 60.4530, GNorm = 1.9645, lr_0 = 3.2611e-04
Loss = 4.1706e-01, PNorm = 60.4544, GNorm = 1.6051, lr_0 = 3.2589e-04
Loss = 3.6296e-01, PNorm = 60.4584, GNorm = 1.3412, lr_0 = 3.2566e-04
Loss = 3.3584e-01, PNorm = 60.4652, GNorm = 1.1956, lr_0 = 3.2544e-04
Loss = 3.6858e-01, PNorm = 60.4705, GNorm = 1.1600, lr_0 = 3.2522e-04
Loss = 3.9157e-01, PNorm = 60.4756, GNorm = 1.2436, lr_0 = 3.2499e-04
Loss = 3.8876e-01, PNorm = 60.4802, GNorm = 1.4801, lr_0 = 3.2477e-04
Loss = 4.3564e-01, PNorm = 60.4846, GNorm = 1.9560, lr_0 = 3.2455e-04
Loss = 3.7471e-01, PNorm = 60.4855, GNorm = 1.2417, lr_0 = 3.2433e-04
Loss = 3.8204e-01, PNorm = 60.4920, GNorm = 1.0045, lr_0 = 3.2410e-04
Loss = 3.7858e-01, PNorm = 60.4959, GNorm = 1.4697, lr_0 = 3.2388e-04
Loss = 3.9375e-01, PNorm = 60.4992, GNorm = 1.4739, lr_0 = 3.2366e-04
Loss = 4.1169e-01, PNorm = 60.5008, GNorm = 1.0921, lr_0 = 3.2344e-04
Loss = 4.0390e-01, PNorm = 60.5065, GNorm = 1.4126, lr_0 = 3.2322e-04
Loss = 3.6602e-01, PNorm = 60.5119, GNorm = 1.4218, lr_0 = 3.2300e-04
Loss = 3.5086e-01, PNorm = 60.5186, GNorm = 1.0355, lr_0 = 3.2277e-04
Loss = 3.3741e-01, PNorm = 60.5226, GNorm = 0.8851, lr_0 = 3.2255e-04
Loss = 3.8669e-01, PNorm = 60.5277, GNorm = 1.3925, lr_0 = 3.2233e-04
Loss = 4.3835e-01, PNorm = 60.5314, GNorm = 1.7719, lr_0 = 3.2211e-04
Loss = 3.6377e-01, PNorm = 60.5339, GNorm = 1.1391, lr_0 = 3.2189e-04
Loss = 3.7261e-01, PNorm = 60.5346, GNorm = 1.6283, lr_0 = 3.2167e-04
Loss = 3.5365e-01, PNorm = 60.5380, GNorm = 1.1856, lr_0 = 3.2145e-04
Loss = 3.4455e-01, PNorm = 60.5415, GNorm = 1.4666, lr_0 = 3.2123e-04
Loss = 3.5415e-01, PNorm = 60.5478, GNorm = 1.5197, lr_0 = 3.2101e-04
Loss = 3.3209e-01, PNorm = 60.5513, GNorm = 1.6515, lr_0 = 3.2079e-04
Loss = 3.6414e-01, PNorm = 60.5587, GNorm = 1.1303, lr_0 = 3.2057e-04
Loss = 3.9077e-01, PNorm = 60.5619, GNorm = 2.0266, lr_0 = 3.2035e-04
Loss = 4.3407e-01, PNorm = 60.5678, GNorm = 1.1951, lr_0 = 3.2013e-04
Loss = 3.6981e-01, PNorm = 60.5718, GNorm = 1.6550, lr_0 = 3.1991e-04
Loss = 4.4717e-01, PNorm = 60.5718, GNorm = 1.3937, lr_0 = 3.1969e-04
Loss = 3.6193e-01, PNorm = 60.5768, GNorm = 1.8728, lr_0 = 3.1947e-04
Loss = 3.9961e-01, PNorm = 60.5838, GNorm = 1.0713, lr_0 = 3.1925e-04
Loss = 3.5816e-01, PNorm = 60.5902, GNorm = 1.7320, lr_0 = 3.1904e-04
Loss = 3.8419e-01, PNorm = 60.5911, GNorm = 1.0324, lr_0 = 3.1882e-04
Loss = 3.5229e-01, PNorm = 60.5942, GNorm = 1.1276, lr_0 = 3.1860e-04
Loss = 3.9542e-01, PNorm = 60.5968, GNorm = 1.4164, lr_0 = 3.1838e-04
Loss = 4.0038e-01, PNorm = 60.6011, GNorm = 1.2391, lr_0 = 3.1816e-04
Loss = 3.8604e-01, PNorm = 60.6034, GNorm = 1.1876, lr_0 = 3.1794e-04
Loss = 3.4552e-01, PNorm = 60.6088, GNorm = 1.4162, lr_0 = 3.1773e-04
Loss = 3.6517e-01, PNorm = 60.6114, GNorm = 1.4122, lr_0 = 3.1751e-04
Loss = 3.9553e-01, PNorm = 60.6146, GNorm = 1.7079, lr_0 = 3.1729e-04
Loss = 4.2280e-01, PNorm = 60.6182, GNorm = 1.0381, lr_0 = 3.1707e-04
Loss = 3.9090e-01, PNorm = 60.6232, GNorm = 1.2196, lr_0 = 3.1686e-04
Loss = 3.7615e-01, PNorm = 60.6279, GNorm = 1.3945, lr_0 = 3.1664e-04
Loss = 4.1831e-01, PNorm = 60.6323, GNorm = 1.4591, lr_0 = 3.1642e-04
Loss = 3.3030e-01, PNorm = 60.6354, GNorm = 1.2045, lr_0 = 3.1621e-04
Validation mae = 0.111970
Epoch 16
Loss = 3.7535e-01, PNorm = 60.6401, GNorm = 1.3209, lr_0 = 3.1599e-04
Loss = 3.3977e-01, PNorm = 60.6435, GNorm = 1.1345, lr_0 = 3.1577e-04
Loss = 3.3384e-01, PNorm = 60.6483, GNorm = 1.3793, lr_0 = 3.1556e-04
Loss = 3.9146e-01, PNorm = 60.6528, GNorm = 1.0658, lr_0 = 3.1534e-04
Loss = 3.7351e-01, PNorm = 60.6582, GNorm = 1.5126, lr_0 = 3.1512e-04
Loss = 3.9157e-01, PNorm = 60.6626, GNorm = 1.1603, lr_0 = 3.1491e-04
Loss = 3.5771e-01, PNorm = 60.6666, GNorm = 1.1443, lr_0 = 3.1469e-04
Loss = 3.8123e-01, PNorm = 60.6713, GNorm = 1.2147, lr_0 = 3.1448e-04
Loss = 3.4500e-01, PNorm = 60.6733, GNorm = 1.3124, lr_0 = 3.1426e-04
Loss = 4.1043e-01, PNorm = 60.6762, GNorm = 1.4635, lr_0 = 3.1405e-04
Loss = 3.4417e-01, PNorm = 60.6804, GNorm = 1.3496, lr_0 = 3.1383e-04
Loss = 4.0673e-01, PNorm = 60.6865, GNorm = 2.2798, lr_0 = 3.1362e-04
Loss = 3.4542e-01, PNorm = 60.6929, GNorm = 2.3724, lr_0 = 3.1340e-04
Loss = 3.2933e-01, PNorm = 60.6991, GNorm = 1.0523, lr_0 = 3.1319e-04
Loss = 3.5695e-01, PNorm = 60.7026, GNorm = 1.1840, lr_0 = 3.1297e-04
Loss = 3.3568e-01, PNorm = 60.7058, GNorm = 1.4439, lr_0 = 3.1276e-04
Loss = 3.6704e-01, PNorm = 60.7102, GNorm = 1.9867, lr_0 = 3.1254e-04
Loss = 3.9047e-01, PNorm = 60.7122, GNorm = 1.7669, lr_0 = 3.1233e-04
Loss = 3.8286e-01, PNorm = 60.7195, GNorm = 1.4925, lr_0 = 3.1212e-04
Loss = 3.6389e-01, PNorm = 60.7262, GNorm = 1.3387, lr_0 = 3.1190e-04
Loss = 3.5693e-01, PNorm = 60.7289, GNorm = 1.4300, lr_0 = 3.1169e-04
Loss = 3.6739e-01, PNorm = 60.7353, GNorm = 1.7430, lr_0 = 3.1147e-04
Loss = 3.5598e-01, PNorm = 60.7390, GNorm = 1.0649, lr_0 = 3.1126e-04
Loss = 4.5122e-01, PNorm = 60.7439, GNorm = 1.7358, lr_0 = 3.1105e-04
Loss = 3.7635e-01, PNorm = 60.7482, GNorm = 1.7511, lr_0 = 3.1083e-04
Loss = 3.7187e-01, PNorm = 60.7539, GNorm = 1.4470, lr_0 = 3.1062e-04
Loss = 4.0661e-01, PNorm = 60.7586, GNorm = 1.1852, lr_0 = 3.1041e-04
Loss = 3.5484e-01, PNorm = 60.7681, GNorm = 1.1264, lr_0 = 3.1020e-04
Loss = 3.9880e-01, PNorm = 60.7724, GNorm = 1.7251, lr_0 = 3.0998e-04
Loss = 3.7155e-01, PNorm = 60.7749, GNorm = 1.2195, lr_0 = 3.0977e-04
Loss = 3.8789e-01, PNorm = 60.7777, GNorm = 1.4393, lr_0 = 3.0956e-04
Loss = 4.0159e-01, PNorm = 60.7829, GNorm = 1.5233, lr_0 = 3.0935e-04
Loss = 4.4348e-01, PNorm = 60.7882, GNorm = 2.7669, lr_0 = 3.0914e-04
Loss = 3.8794e-01, PNorm = 60.7911, GNorm = 1.4753, lr_0 = 3.0892e-04
Loss = 4.9816e-01, PNorm = 60.7948, GNorm = 2.1027, lr_0 = 3.0871e-04
Loss = 3.6745e-01, PNorm = 60.7981, GNorm = 1.4516, lr_0 = 3.0850e-04
Loss = 3.8553e-01, PNorm = 60.8002, GNorm = 1.4008, lr_0 = 3.0829e-04
Loss = 3.9399e-01, PNorm = 60.8013, GNorm = 1.5888, lr_0 = 3.0808e-04
Loss = 3.3307e-01, PNorm = 60.8099, GNorm = 1.4851, lr_0 = 3.0787e-04
Loss = 3.5863e-01, PNorm = 60.8118, GNorm = 1.4037, lr_0 = 3.0766e-04
Loss = 4.0968e-01, PNorm = 60.8166, GNorm = 1.7514, lr_0 = 3.0745e-04
Loss = 3.6384e-01, PNorm = 60.8189, GNorm = 1.1653, lr_0 = 3.0723e-04
Loss = 3.2467e-01, PNorm = 60.8210, GNorm = 1.5257, lr_0 = 3.0702e-04
Loss = 4.2498e-01, PNorm = 60.8240, GNorm = 1.5365, lr_0 = 3.0681e-04
Loss = 4.0319e-01, PNorm = 60.8303, GNorm = 1.2827, lr_0 = 3.0660e-04
Loss = 3.8043e-01, PNorm = 60.8362, GNorm = 1.3075, lr_0 = 3.0639e-04
Loss = 3.6082e-01, PNorm = 60.8419, GNorm = 1.3065, lr_0 = 3.0618e-04
Loss = 3.9502e-01, PNorm = 60.8487, GNorm = 1.5614, lr_0 = 3.0597e-04
Loss = 4.1533e-01, PNorm = 60.8570, GNorm = 1.4752, lr_0 = 3.0576e-04
Loss = 3.9958e-01, PNorm = 60.8590, GNorm = 1.1420, lr_0 = 3.0555e-04
Loss = 4.5130e-01, PNorm = 60.8595, GNorm = 1.3466, lr_0 = 3.0535e-04
Loss = 3.8663e-01, PNorm = 60.8635, GNorm = 1.0006, lr_0 = 3.0514e-04
Loss = 4.0608e-01, PNorm = 60.8698, GNorm = 1.4264, lr_0 = 3.0493e-04
Loss = 3.5193e-01, PNorm = 60.8727, GNorm = 1.2257, lr_0 = 3.0472e-04
Loss = 3.5555e-01, PNorm = 60.8753, GNorm = 1.7643, lr_0 = 3.0451e-04
Loss = 4.1980e-01, PNorm = 60.8783, GNorm = 1.4360, lr_0 = 3.0430e-04
Loss = 3.4916e-01, PNorm = 60.8804, GNorm = 1.2913, lr_0 = 3.0409e-04
Loss = 3.9820e-01, PNorm = 60.8869, GNorm = 1.4315, lr_0 = 3.0388e-04
Loss = 3.3510e-01, PNorm = 60.8922, GNorm = 1.6232, lr_0 = 3.0368e-04
Loss = 3.5562e-01, PNorm = 60.8993, GNorm = 1.0042, lr_0 = 3.0347e-04
Loss = 3.6473e-01, PNorm = 60.8982, GNorm = 1.2643, lr_0 = 3.0326e-04
Loss = 4.0924e-01, PNorm = 60.8972, GNorm = 1.6370, lr_0 = 3.0305e-04
Loss = 3.9638e-01, PNorm = 60.9030, GNorm = 1.8258, lr_0 = 3.0284e-04
Loss = 3.9472e-01, PNorm = 60.9090, GNorm = 1.5004, lr_0 = 3.0264e-04
Loss = 3.8994e-01, PNorm = 60.9135, GNorm = 2.0529, lr_0 = 3.0243e-04
Loss = 3.7965e-01, PNorm = 60.9157, GNorm = 1.3123, lr_0 = 3.0222e-04
Loss = 4.2125e-01, PNorm = 60.9171, GNorm = 2.4305, lr_0 = 3.0202e-04
Loss = 3.6699e-01, PNorm = 60.9198, GNorm = 1.4196, lr_0 = 3.0181e-04
Loss = 3.2917e-01, PNorm = 60.9218, GNorm = 1.3849, lr_0 = 3.0160e-04
Loss = 3.9678e-01, PNorm = 60.9243, GNorm = 1.2955, lr_0 = 3.0140e-04
Loss = 3.7155e-01, PNorm = 60.9292, GNorm = 1.0823, lr_0 = 3.0119e-04
Loss = 4.2904e-01, PNorm = 60.9338, GNorm = 1.9109, lr_0 = 3.0098e-04
Loss = 3.7511e-01, PNorm = 60.9391, GNorm = 1.8836, lr_0 = 3.0078e-04
Loss = 3.5683e-01, PNorm = 60.9397, GNorm = 1.1381, lr_0 = 3.0057e-04
Loss = 4.0425e-01, PNorm = 60.9426, GNorm = 1.6219, lr_0 = 3.0036e-04
Loss = 3.8906e-01, PNorm = 60.9426, GNorm = 1.2228, lr_0 = 3.0016e-04
Loss = 3.8601e-01, PNorm = 60.9463, GNorm = 1.4757, lr_0 = 2.9995e-04
Loss = 3.7582e-01, PNorm = 60.9487, GNorm = 1.6170, lr_0 = 2.9975e-04
Loss = 4.0430e-01, PNorm = 60.9507, GNorm = 1.0650, lr_0 = 2.9954e-04
Loss = 3.8945e-01, PNorm = 60.9551, GNorm = 1.5663, lr_0 = 2.9934e-04
Loss = 3.3963e-01, PNorm = 60.9580, GNorm = 1.6102, lr_0 = 2.9913e-04
Loss = 3.6805e-01, PNorm = 60.9599, GNorm = 1.1902, lr_0 = 2.9893e-04
Loss = 3.8632e-01, PNorm = 60.9611, GNorm = 1.6147, lr_0 = 2.9872e-04
Loss = 4.0619e-01, PNorm = 60.9660, GNorm = 1.6121, lr_0 = 2.9852e-04
Loss = 4.0282e-01, PNorm = 60.9708, GNorm = 1.3563, lr_0 = 2.9831e-04
Loss = 3.5243e-01, PNorm = 60.9746, GNorm = 1.2486, lr_0 = 2.9811e-04
Loss = 3.5453e-01, PNorm = 60.9735, GNorm = 1.1552, lr_0 = 2.9790e-04
Loss = 3.5578e-01, PNorm = 60.9787, GNorm = 1.0988, lr_0 = 2.9770e-04
Loss = 3.4759e-01, PNorm = 60.9836, GNorm = 0.9455, lr_0 = 2.9750e-04
Loss = 4.3001e-01, PNorm = 60.9865, GNorm = 1.5629, lr_0 = 2.9729e-04
Loss = 4.3690e-01, PNorm = 60.9928, GNorm = 1.9859, lr_0 = 2.9709e-04
Loss = 3.4444e-01, PNorm = 60.9956, GNorm = 1.1521, lr_0 = 2.9689e-04
Loss = 3.9387e-01, PNorm = 61.0000, GNorm = 1.5494, lr_0 = 2.9668e-04
Loss = 3.5457e-01, PNorm = 61.0041, GNorm = 1.4128, lr_0 = 2.9648e-04
Loss = 4.3405e-01, PNorm = 61.0120, GNorm = 1.7272, lr_0 = 2.9628e-04
Loss = 3.6296e-01, PNorm = 61.0173, GNorm = 1.0874, lr_0 = 2.9607e-04
Loss = 3.8543e-01, PNorm = 61.0212, GNorm = 1.2996, lr_0 = 2.9587e-04
Loss = 3.5164e-01, PNorm = 61.0195, GNorm = 1.1642, lr_0 = 2.9567e-04
Loss = 3.6663e-01, PNorm = 61.0220, GNorm = 1.5397, lr_0 = 2.9546e-04
Loss = 4.2654e-01, PNorm = 61.0263, GNorm = 1.3472, lr_0 = 2.9526e-04
Loss = 3.4645e-01, PNorm = 61.0258, GNorm = 1.9588, lr_0 = 2.9506e-04
Loss = 3.2331e-01, PNorm = 61.0306, GNorm = 1.0904, lr_0 = 2.9486e-04
Loss = 4.0263e-01, PNorm = 61.0338, GNorm = 1.5282, lr_0 = 2.9466e-04
Loss = 3.9667e-01, PNorm = 61.0376, GNorm = 1.5046, lr_0 = 2.9445e-04
Loss = 3.5756e-01, PNorm = 61.0406, GNorm = 1.3087, lr_0 = 2.9425e-04
Loss = 3.6520e-01, PNorm = 61.0436, GNorm = 2.0103, lr_0 = 2.9405e-04
Loss = 3.5527e-01, PNorm = 61.0468, GNorm = 1.6661, lr_0 = 2.9385e-04
Loss = 3.7223e-01, PNorm = 61.0495, GNorm = 1.5259, lr_0 = 2.9365e-04
Loss = 3.4328e-01, PNorm = 61.0554, GNorm = 1.3307, lr_0 = 2.9345e-04
Loss = 3.5382e-01, PNorm = 61.0570, GNorm = 1.4068, lr_0 = 2.9325e-04
Loss = 3.5791e-01, PNorm = 61.0605, GNorm = 1.5939, lr_0 = 2.9305e-04
Loss = 4.0049e-01, PNorm = 61.0656, GNorm = 0.8322, lr_0 = 2.9284e-04
Loss = 3.6848e-01, PNorm = 61.0730, GNorm = 1.1496, lr_0 = 2.9264e-04
Loss = 3.5384e-01, PNorm = 61.0762, GNorm = 1.2242, lr_0 = 2.9244e-04
Loss = 4.2271e-01, PNorm = 61.0773, GNorm = 1.1706, lr_0 = 2.9224e-04
Loss = 3.4825e-01, PNorm = 61.0803, GNorm = 1.3495, lr_0 = 2.9204e-04
Loss = 3.8006e-01, PNorm = 61.0809, GNorm = 1.3305, lr_0 = 2.9184e-04
Loss = 3.5201e-01, PNorm = 61.0859, GNorm = 0.8010, lr_0 = 2.9164e-04
Loss = 3.6478e-01, PNorm = 61.0945, GNorm = 0.9814, lr_0 = 2.9144e-04
Loss = 3.9640e-01, PNorm = 61.0966, GNorm = 1.2684, lr_0 = 2.9124e-04
Validation mae = 0.112203
Epoch 17
Loss = 3.6488e-01, PNorm = 61.1011, GNorm = 1.3631, lr_0 = 2.9104e-04
Loss = 3.2062e-01, PNorm = 61.1061, GNorm = 1.5435, lr_0 = 2.9084e-04
Loss = 3.8674e-01, PNorm = 61.1080, GNorm = 1.2591, lr_0 = 2.9065e-04
Loss = 3.9874e-01, PNorm = 61.1097, GNorm = 1.7818, lr_0 = 2.9045e-04
Loss = 3.7195e-01, PNorm = 61.1129, GNorm = 1.1280, lr_0 = 2.9025e-04
Loss = 3.7545e-01, PNorm = 61.1168, GNorm = 2.0411, lr_0 = 2.9005e-04
Loss = 3.2255e-01, PNorm = 61.1214, GNorm = 0.9623, lr_0 = 2.8985e-04
Loss = 3.6339e-01, PNorm = 61.1262, GNorm = 1.2204, lr_0 = 2.8965e-04
Loss = 4.2520e-01, PNorm = 61.1321, GNorm = 1.2253, lr_0 = 2.8945e-04
Loss = 3.2580e-01, PNorm = 61.1360, GNorm = 1.4548, lr_0 = 2.8925e-04
Loss = 3.2806e-01, PNorm = 61.1396, GNorm = 1.2649, lr_0 = 2.8906e-04
Loss = 4.0030e-01, PNorm = 61.1447, GNorm = 1.3547, lr_0 = 2.8886e-04
Loss = 3.9381e-01, PNorm = 61.1450, GNorm = 1.7896, lr_0 = 2.8866e-04
Loss = 3.4738e-01, PNorm = 61.1499, GNorm = 1.2656, lr_0 = 2.8846e-04
Loss = 4.1402e-01, PNorm = 61.1497, GNorm = 1.6510, lr_0 = 2.8826e-04
Loss = 3.2811e-01, PNorm = 61.1502, GNorm = 1.0452, lr_0 = 2.8807e-04
Loss = 3.7790e-01, PNorm = 61.1550, GNorm = 1.2426, lr_0 = 2.8787e-04
Loss = 3.9116e-01, PNorm = 61.1580, GNorm = 1.8509, lr_0 = 2.8767e-04
Loss = 3.5629e-01, PNorm = 61.1605, GNorm = 1.0711, lr_0 = 2.8748e-04
Loss = 3.6028e-01, PNorm = 61.1604, GNorm = 1.3621, lr_0 = 2.8728e-04
Loss = 4.2001e-01, PNorm = 61.1627, GNorm = 1.6529, lr_0 = 2.8708e-04
Loss = 3.8858e-01, PNorm = 61.1663, GNorm = 1.3379, lr_0 = 2.8689e-04
Loss = 3.7890e-01, PNorm = 61.1739, GNorm = 1.9642, lr_0 = 2.8669e-04
Loss = 3.4501e-01, PNorm = 61.1775, GNorm = 1.0526, lr_0 = 2.8649e-04
Loss = 3.6320e-01, PNorm = 61.1791, GNorm = 1.5312, lr_0 = 2.8630e-04
Loss = 3.0083e-01, PNorm = 61.1820, GNorm = 1.4482, lr_0 = 2.8610e-04
Loss = 4.0360e-01, PNorm = 61.1820, GNorm = 1.8416, lr_0 = 2.8590e-04
Loss = 4.1088e-01, PNorm = 61.1877, GNorm = 1.7180, lr_0 = 2.8571e-04
Loss = 3.8954e-01, PNorm = 61.1909, GNorm = 1.6100, lr_0 = 2.8551e-04
Loss = 3.2515e-01, PNorm = 61.1942, GNorm = 1.0158, lr_0 = 2.8532e-04
Loss = 3.3646e-01, PNorm = 61.1980, GNorm = 1.1449, lr_0 = 2.8512e-04
Loss = 3.3195e-01, PNorm = 61.2028, GNorm = 1.1213, lr_0 = 2.8493e-04
Loss = 3.8100e-01, PNorm = 61.2061, GNorm = 1.6281, lr_0 = 2.8473e-04
Loss = 4.0359e-01, PNorm = 61.2079, GNorm = 1.4217, lr_0 = 2.8454e-04
Loss = 3.5858e-01, PNorm = 61.2093, GNorm = 1.3151, lr_0 = 2.8434e-04
Loss = 3.9202e-01, PNorm = 61.2119, GNorm = 1.6093, lr_0 = 2.8415e-04
Loss = 3.9287e-01, PNorm = 61.2178, GNorm = 1.4611, lr_0 = 2.8395e-04
Loss = 3.5426e-01, PNorm = 61.2216, GNorm = 1.5304, lr_0 = 2.8376e-04
Loss = 3.9004e-01, PNorm = 61.2221, GNorm = 1.4707, lr_0 = 2.8356e-04
Loss = 4.0654e-01, PNorm = 61.2262, GNorm = 1.6872, lr_0 = 2.8337e-04
Loss = 4.1396e-01, PNorm = 61.2295, GNorm = 1.5299, lr_0 = 2.8317e-04
Loss = 4.2087e-01, PNorm = 61.2331, GNorm = 1.8594, lr_0 = 2.8298e-04
Loss = 5.1315e-01, PNorm = 61.2374, GNorm = 1.3824, lr_0 = 2.8279e-04
Loss = 4.0441e-01, PNorm = 61.2457, GNorm = 1.1527, lr_0 = 2.8259e-04
Loss = 3.8972e-01, PNorm = 61.2494, GNorm = 1.7803, lr_0 = 2.8240e-04
Loss = 3.5978e-01, PNorm = 61.2516, GNorm = 1.3886, lr_0 = 2.8221e-04
Loss = 3.4008e-01, PNorm = 61.2515, GNorm = 1.8143, lr_0 = 2.8201e-04
Loss = 3.8007e-01, PNorm = 61.2520, GNorm = 1.3307, lr_0 = 2.8182e-04
Loss = 3.5986e-01, PNorm = 61.2569, GNorm = 2.0264, lr_0 = 2.8163e-04
Loss = 3.9106e-01, PNorm = 61.2639, GNorm = 2.1029, lr_0 = 2.8143e-04
Loss = 3.3469e-01, PNorm = 61.2658, GNorm = 1.1504, lr_0 = 2.8124e-04
Loss = 3.9328e-01, PNorm = 61.2688, GNorm = 1.1705, lr_0 = 2.8105e-04
Loss = 3.7277e-01, PNorm = 61.2721, GNorm = 2.7442, lr_0 = 2.8085e-04
Loss = 3.9294e-01, PNorm = 61.2730, GNorm = 1.6236, lr_0 = 2.8066e-04
Loss = 3.4289e-01, PNorm = 61.2763, GNorm = 1.1296, lr_0 = 2.8047e-04
Loss = 3.8432e-01, PNorm = 61.2796, GNorm = 1.4059, lr_0 = 2.8028e-04
Loss = 4.1363e-01, PNorm = 61.2848, GNorm = 1.3318, lr_0 = 2.8009e-04
Loss = 3.1279e-01, PNorm = 61.2856, GNorm = 1.4634, lr_0 = 2.7989e-04
Loss = 3.7874e-01, PNorm = 61.2875, GNorm = 1.2861, lr_0 = 2.7970e-04
Loss = 3.9609e-01, PNorm = 61.2907, GNorm = 1.2559, lr_0 = 2.7951e-04
Loss = 4.0751e-01, PNorm = 61.2940, GNorm = 1.7267, lr_0 = 2.7932e-04
Loss = 4.0084e-01, PNorm = 61.2995, GNorm = 1.3129, lr_0 = 2.7913e-04
Loss = 3.5724e-01, PNorm = 61.3041, GNorm = 2.4481, lr_0 = 2.7894e-04
Loss = 3.9293e-01, PNorm = 61.3054, GNorm = 1.3094, lr_0 = 2.7875e-04
Loss = 3.1771e-01, PNorm = 61.3092, GNorm = 1.2405, lr_0 = 2.7855e-04
Loss = 4.1384e-01, PNorm = 61.3067, GNorm = 1.9304, lr_0 = 2.7836e-04
Loss = 3.6793e-01, PNorm = 61.3134, GNorm = 1.5478, lr_0 = 2.7817e-04
Loss = 3.8348e-01, PNorm = 61.3162, GNorm = 1.4314, lr_0 = 2.7798e-04
Loss = 3.5068e-01, PNorm = 61.3209, GNorm = 1.3028, lr_0 = 2.7779e-04
Loss = 3.7818e-01, PNorm = 61.3241, GNorm = 1.1674, lr_0 = 2.7760e-04
Loss = 3.8124e-01, PNorm = 61.3306, GNorm = 1.2717, lr_0 = 2.7741e-04
Loss = 3.9665e-01, PNorm = 61.3370, GNorm = 1.4081, lr_0 = 2.7722e-04
Loss = 3.9164e-01, PNorm = 61.3408, GNorm = 1.5590, lr_0 = 2.7703e-04
Loss = 3.8267e-01, PNorm = 61.3428, GNorm = 1.3845, lr_0 = 2.7684e-04
Loss = 3.5053e-01, PNorm = 61.3499, GNorm = 1.0309, lr_0 = 2.7665e-04
Loss = 3.5039e-01, PNorm = 61.3555, GNorm = 1.4027, lr_0 = 2.7646e-04
Loss = 3.8430e-01, PNorm = 61.3564, GNorm = 1.8955, lr_0 = 2.7627e-04
Loss = 3.8293e-01, PNorm = 61.3558, GNorm = 1.4180, lr_0 = 2.7608e-04
Loss = 3.2373e-01, PNorm = 61.3574, GNorm = 1.1053, lr_0 = 2.7590e-04
Loss = 4.0919e-01, PNorm = 61.3606, GNorm = 1.4022, lr_0 = 2.7571e-04
Loss = 3.7372e-01, PNorm = 61.3658, GNorm = 1.2418, lr_0 = 2.7552e-04
Loss = 3.4386e-01, PNorm = 61.3696, GNorm = 1.5989, lr_0 = 2.7533e-04
Loss = 3.7641e-01, PNorm = 61.3722, GNorm = 1.7735, lr_0 = 2.7514e-04
Loss = 3.1362e-01, PNorm = 61.3774, GNorm = 1.3655, lr_0 = 2.7495e-04
Loss = 3.5356e-01, PNorm = 61.3835, GNorm = 1.0977, lr_0 = 2.7476e-04
Loss = 3.4458e-01, PNorm = 61.3867, GNorm = 1.6856, lr_0 = 2.7457e-04
Loss = 3.5760e-01, PNorm = 61.3882, GNorm = 1.5233, lr_0 = 2.7439e-04
Loss = 3.8872e-01, PNorm = 61.3920, GNorm = 1.1544, lr_0 = 2.7420e-04
Loss = 4.1329e-01, PNorm = 61.3918, GNorm = 1.3710, lr_0 = 2.7401e-04
Loss = 4.0502e-01, PNorm = 61.3963, GNorm = 1.7088, lr_0 = 2.7382e-04
Loss = 3.6353e-01, PNorm = 61.4020, GNorm = 1.3253, lr_0 = 2.7364e-04
Loss = 3.6326e-01, PNorm = 61.4033, GNorm = 1.4267, lr_0 = 2.7345e-04
Loss = 3.4693e-01, PNorm = 61.4061, GNorm = 1.8876, lr_0 = 2.7326e-04
Loss = 3.9287e-01, PNorm = 61.4067, GNorm = 1.2343, lr_0 = 2.7307e-04
Loss = 4.2236e-01, PNorm = 61.4156, GNorm = 1.1032, lr_0 = 2.7289e-04
Loss = 3.8411e-01, PNorm = 61.4182, GNorm = 1.8492, lr_0 = 2.7270e-04
Loss = 3.4772e-01, PNorm = 61.4196, GNorm = 1.5388, lr_0 = 2.7251e-04
Loss = 3.6818e-01, PNorm = 61.4218, GNorm = 1.6363, lr_0 = 2.7233e-04
Loss = 3.8374e-01, PNorm = 61.4264, GNorm = 1.5553, lr_0 = 2.7214e-04
Loss = 3.7824e-01, PNorm = 61.4320, GNorm = 1.2479, lr_0 = 2.7195e-04
Loss = 3.8685e-01, PNorm = 61.4344, GNorm = 1.3616, lr_0 = 2.7177e-04
Loss = 3.8288e-01, PNorm = 61.4386, GNorm = 1.1834, lr_0 = 2.7158e-04
Loss = 3.9751e-01, PNorm = 61.4379, GNorm = 1.6929, lr_0 = 2.7139e-04
Loss = 3.6280e-01, PNorm = 61.4397, GNorm = 1.6597, lr_0 = 2.7121e-04
Loss = 4.0312e-01, PNorm = 61.4444, GNorm = 1.4872, lr_0 = 2.7102e-04
Loss = 4.1358e-01, PNorm = 61.4458, GNorm = 1.3800, lr_0 = 2.7084e-04
Loss = 4.2608e-01, PNorm = 61.4515, GNorm = 1.5938, lr_0 = 2.7065e-04
Loss = 3.7529e-01, PNorm = 61.4529, GNorm = 1.7143, lr_0 = 2.7047e-04
Loss = 3.4768e-01, PNorm = 61.4601, GNorm = 1.4001, lr_0 = 2.7028e-04
Loss = 3.7822e-01, PNorm = 61.4630, GNorm = 1.3517, lr_0 = 2.7010e-04
Loss = 4.2713e-01, PNorm = 61.4673, GNorm = 1.3352, lr_0 = 2.6991e-04
Loss = 3.4541e-01, PNorm = 61.4711, GNorm = 1.1385, lr_0 = 2.6973e-04
Loss = 4.0344e-01, PNorm = 61.4779, GNorm = 1.2441, lr_0 = 2.6954e-04
Loss = 3.6885e-01, PNorm = 61.4814, GNorm = 1.6572, lr_0 = 2.6936e-04
Loss = 3.5776e-01, PNorm = 61.4844, GNorm = 1.2419, lr_0 = 2.6917e-04
Loss = 4.0732e-01, PNorm = 61.4885, GNorm = 1.2898, lr_0 = 2.6899e-04
Loss = 3.8592e-01, PNorm = 61.4917, GNorm = 1.3398, lr_0 = 2.6880e-04
Loss = 3.8293e-01, PNorm = 61.4912, GNorm = 1.2492, lr_0 = 2.6862e-04
Loss = 4.1227e-01, PNorm = 61.4964, GNorm = 1.3740, lr_0 = 2.6844e-04
Loss = 3.4111e-01, PNorm = 61.4985, GNorm = 1.4517, lr_0 = 2.6825e-04
Validation mae = 0.112011
Epoch 18
Loss = 3.7555e-01, PNorm = 61.5018, GNorm = 1.0627, lr_0 = 2.6807e-04
Loss = 3.8228e-01, PNorm = 61.5062, GNorm = 1.3149, lr_0 = 2.6788e-04
Loss = 3.5376e-01, PNorm = 61.5068, GNorm = 1.1993, lr_0 = 2.6770e-04
Loss = 3.9193e-01, PNorm = 61.5103, GNorm = 1.1186, lr_0 = 2.6752e-04
Loss = 3.8789e-01, PNorm = 61.5137, GNorm = 2.3439, lr_0 = 2.6733e-04
Loss = 3.8185e-01, PNorm = 61.5179, GNorm = 1.4579, lr_0 = 2.6715e-04
Loss = 3.4424e-01, PNorm = 61.5210, GNorm = 1.1880, lr_0 = 2.6697e-04
Loss = 3.8205e-01, PNorm = 61.5267, GNorm = 1.6608, lr_0 = 2.6678e-04
Loss = 3.5015e-01, PNorm = 61.5319, GNorm = 1.2316, lr_0 = 2.6660e-04
Loss = 4.2547e-01, PNorm = 61.5339, GNorm = 1.8537, lr_0 = 2.6642e-04
Loss = 4.1883e-01, PNorm = 61.5328, GNorm = 1.2497, lr_0 = 2.6624e-04
Loss = 3.7025e-01, PNorm = 61.5360, GNorm = 1.2597, lr_0 = 2.6605e-04
Loss = 3.3056e-01, PNorm = 61.5419, GNorm = 1.8312, lr_0 = 2.6587e-04
Loss = 3.8342e-01, PNorm = 61.5430, GNorm = 1.2499, lr_0 = 2.6569e-04
Loss = 3.9414e-01, PNorm = 61.5448, GNorm = 1.3624, lr_0 = 2.6551e-04
Loss = 3.7763e-01, PNorm = 61.5468, GNorm = 2.3569, lr_0 = 2.6533e-04
Loss = 3.2534e-01, PNorm = 61.5525, GNorm = 1.2790, lr_0 = 2.6514e-04
Loss = 4.1829e-01, PNorm = 61.5620, GNorm = 1.3642, lr_0 = 2.6496e-04
Loss = 3.3702e-01, PNorm = 61.5651, GNorm = 1.4247, lr_0 = 2.6478e-04
Loss = 3.5477e-01, PNorm = 61.5666, GNorm = 1.5770, lr_0 = 2.6460e-04
Loss = 3.6362e-01, PNorm = 61.5703, GNorm = 1.5829, lr_0 = 2.6442e-04
Loss = 3.6939e-01, PNorm = 61.5720, GNorm = 1.1685, lr_0 = 2.6424e-04
Loss = 3.6720e-01, PNorm = 61.5787, GNorm = 1.7140, lr_0 = 2.6406e-04
Loss = 3.3067e-01, PNorm = 61.5810, GNorm = 1.1659, lr_0 = 2.6388e-04
Loss = 3.3925e-01, PNorm = 61.5876, GNorm = 0.8304, lr_0 = 2.6369e-04
Loss = 3.9384e-01, PNorm = 61.5918, GNorm = 1.5665, lr_0 = 2.6351e-04
Loss = 3.4576e-01, PNorm = 61.5947, GNorm = 1.3260, lr_0 = 2.6333e-04
Loss = 3.4607e-01, PNorm = 61.5982, GNorm = 1.4222, lr_0 = 2.6315e-04
Loss = 3.9159e-01, PNorm = 61.5995, GNorm = 1.4266, lr_0 = 2.6297e-04
Loss = 3.9372e-01, PNorm = 61.6036, GNorm = 1.4135, lr_0 = 2.6279e-04
Loss = 3.3819e-01, PNorm = 61.6069, GNorm = 1.4553, lr_0 = 2.6261e-04
Loss = 4.4221e-01, PNorm = 61.6135, GNorm = 1.9367, lr_0 = 2.6243e-04
Loss = 3.3308e-01, PNorm = 61.6161, GNorm = 1.2447, lr_0 = 2.6225e-04
Loss = 3.7403e-01, PNorm = 61.6217, GNorm = 1.3761, lr_0 = 2.6207e-04
Loss = 3.3003e-01, PNorm = 61.6271, GNorm = 2.1892, lr_0 = 2.6189e-04
Loss = 3.5218e-01, PNorm = 61.6261, GNorm = 1.3373, lr_0 = 2.6171e-04
Loss = 3.7222e-01, PNorm = 61.6298, GNorm = 1.7647, lr_0 = 2.6153e-04
Loss = 3.6106e-01, PNorm = 61.6334, GNorm = 1.4811, lr_0 = 2.6136e-04
Loss = 3.9186e-01, PNorm = 61.6350, GNorm = 1.4199, lr_0 = 2.6118e-04
Loss = 4.2039e-01, PNorm = 61.6364, GNorm = 1.0946, lr_0 = 2.6100e-04
Loss = 3.4409e-01, PNorm = 61.6401, GNorm = 1.9681, lr_0 = 2.6082e-04
Loss = 3.6240e-01, PNorm = 61.6409, GNorm = 1.2940, lr_0 = 2.6064e-04
Loss = 3.4330e-01, PNorm = 61.6443, GNorm = 1.3897, lr_0 = 2.6046e-04
Loss = 3.3671e-01, PNorm = 61.6457, GNorm = 1.5049, lr_0 = 2.6028e-04
Loss = 3.7611e-01, PNorm = 61.6479, GNorm = 1.5210, lr_0 = 2.6011e-04
Loss = 4.0822e-01, PNorm = 61.6518, GNorm = 1.5587, lr_0 = 2.5993e-04
Loss = 4.0029e-01, PNorm = 61.6545, GNorm = 1.5645, lr_0 = 2.5975e-04
Loss = 4.0254e-01, PNorm = 61.6571, GNorm = 1.5626, lr_0 = 2.5957e-04
Loss = 3.6321e-01, PNorm = 61.6634, GNorm = 2.2310, lr_0 = 2.5939e-04
Loss = 3.1850e-01, PNorm = 61.6694, GNorm = 1.3165, lr_0 = 2.5922e-04
Loss = 3.6276e-01, PNorm = 61.6697, GNorm = 1.2917, lr_0 = 2.5904e-04
Loss = 4.0073e-01, PNorm = 61.6709, GNorm = 1.1349, lr_0 = 2.5886e-04
Loss = 3.5468e-01, PNorm = 61.6761, GNorm = 1.5994, lr_0 = 2.5868e-04
Loss = 3.9992e-01, PNorm = 61.6801, GNorm = 1.9996, lr_0 = 2.5851e-04
Loss = 3.8934e-01, PNorm = 61.6855, GNorm = 1.2883, lr_0 = 2.5833e-04
Loss = 3.7926e-01, PNorm = 61.6880, GNorm = 1.4328, lr_0 = 2.5815e-04
Loss = 3.3945e-01, PNorm = 61.6881, GNorm = 0.9987, lr_0 = 2.5797e-04
Loss = 3.6561e-01, PNorm = 61.6903, GNorm = 1.1417, lr_0 = 2.5780e-04
Loss = 3.2963e-01, PNorm = 61.6950, GNorm = 1.6012, lr_0 = 2.5762e-04
Loss = 3.9695e-01, PNorm = 61.7007, GNorm = 1.5689, lr_0 = 2.5745e-04
Loss = 4.3780e-01, PNorm = 61.6981, GNorm = 1.4444, lr_0 = 2.5727e-04
Loss = 4.1163e-01, PNorm = 61.6995, GNorm = 1.6011, lr_0 = 2.5709e-04
Loss = 3.9861e-01, PNorm = 61.7000, GNorm = 2.1095, lr_0 = 2.5692e-04
Loss = 4.2998e-01, PNorm = 61.7069, GNorm = 1.2781, lr_0 = 2.5674e-04
Loss = 3.8383e-01, PNorm = 61.7070, GNorm = 1.4697, lr_0 = 2.5656e-04
Loss = 3.4485e-01, PNorm = 61.7083, GNorm = 1.6811, lr_0 = 2.5639e-04
Loss = 3.7431e-01, PNorm = 61.7105, GNorm = 1.3259, lr_0 = 2.5621e-04
Loss = 3.5192e-01, PNorm = 61.7137, GNorm = 1.6674, lr_0 = 2.5604e-04
Loss = 3.6636e-01, PNorm = 61.7174, GNorm = 2.0782, lr_0 = 2.5586e-04
Loss = 3.3249e-01, PNorm = 61.7196, GNorm = 1.3117, lr_0 = 2.5569e-04
Loss = 4.1746e-01, PNorm = 61.7212, GNorm = 1.5950, lr_0 = 2.5551e-04
Loss = 3.1387e-01, PNorm = 61.7225, GNorm = 1.7109, lr_0 = 2.5534e-04
Loss = 4.0553e-01, PNorm = 61.7254, GNorm = 1.1667, lr_0 = 2.5516e-04
Loss = 3.5743e-01, PNorm = 61.7309, GNorm = 1.2057, lr_0 = 2.5499e-04
Loss = 3.7907e-01, PNorm = 61.7326, GNorm = 1.1771, lr_0 = 2.5481e-04
Loss = 3.3112e-01, PNorm = 61.7336, GNorm = 1.4986, lr_0 = 2.5464e-04
Loss = 3.7431e-01, PNorm = 61.7353, GNorm = 0.9550, lr_0 = 2.5446e-04
Loss = 3.9041e-01, PNorm = 61.7392, GNorm = 1.5469, lr_0 = 2.5429e-04
Loss = 4.2124e-01, PNorm = 61.7421, GNorm = 2.1370, lr_0 = 2.5411e-04
Loss = 3.5754e-01, PNorm = 61.7484, GNorm = 1.1355, lr_0 = 2.5394e-04
Loss = 3.7901e-01, PNorm = 61.7498, GNorm = 1.1601, lr_0 = 2.5377e-04
Loss = 3.7669e-01, PNorm = 61.7512, GNorm = 1.5874, lr_0 = 2.5359e-04
Loss = 3.9697e-01, PNorm = 61.7572, GNorm = 1.3670, lr_0 = 2.5342e-04
Loss = 3.7617e-01, PNorm = 61.7578, GNorm = 1.3008, lr_0 = 2.5325e-04
Loss = 4.0321e-01, PNorm = 61.7578, GNorm = 1.7055, lr_0 = 2.5307e-04
Loss = 3.5985e-01, PNorm = 61.7589, GNorm = 1.2228, lr_0 = 2.5290e-04
Loss = 4.0157e-01, PNorm = 61.7625, GNorm = 1.3560, lr_0 = 2.5273e-04
Loss = 3.4434e-01, PNorm = 61.7657, GNorm = 1.4626, lr_0 = 2.5255e-04
Loss = 3.8883e-01, PNorm = 61.7685, GNorm = 1.9462, lr_0 = 2.5238e-04
Loss = 3.5821e-01, PNorm = 61.7709, GNorm = 1.3594, lr_0 = 2.5221e-04
Loss = 3.5253e-01, PNorm = 61.7724, GNorm = 1.1303, lr_0 = 2.5203e-04
Loss = 3.4505e-01, PNorm = 61.7759, GNorm = 1.1733, lr_0 = 2.5186e-04
Loss = 3.2942e-01, PNorm = 61.7794, GNorm = 1.0634, lr_0 = 2.5169e-04
Loss = 3.7186e-01, PNorm = 61.7839, GNorm = 1.7693, lr_0 = 2.5152e-04
Loss = 3.8001e-01, PNorm = 61.7870, GNorm = 1.4177, lr_0 = 2.5134e-04
Loss = 3.9163e-01, PNorm = 61.7904, GNorm = 1.6172, lr_0 = 2.5117e-04
Loss = 4.0292e-01, PNorm = 61.7929, GNorm = 1.3534, lr_0 = 2.5100e-04
Loss = 3.1109e-01, PNorm = 61.7982, GNorm = 1.1536, lr_0 = 2.5083e-04
Loss = 4.1378e-01, PNorm = 61.7999, GNorm = 1.2088, lr_0 = 2.5066e-04
Loss = 3.5318e-01, PNorm = 61.8017, GNorm = 1.5846, lr_0 = 2.5048e-04
Loss = 3.7859e-01, PNorm = 61.8034, GNorm = 1.3295, lr_0 = 2.5031e-04
Loss = 3.3127e-01, PNorm = 61.8053, GNorm = 1.6470, lr_0 = 2.5014e-04
Loss = 3.8563e-01, PNorm = 61.8112, GNorm = 1.4904, lr_0 = 2.4997e-04
Loss = 3.9552e-01, PNorm = 61.8125, GNorm = 1.8761, lr_0 = 2.4980e-04
Loss = 3.6382e-01, PNorm = 61.8162, GNorm = 1.7765, lr_0 = 2.4963e-04
Loss = 3.6656e-01, PNorm = 61.8203, GNorm = 1.3940, lr_0 = 2.4946e-04
Loss = 3.8452e-01, PNorm = 61.8229, GNorm = 1.5004, lr_0 = 2.4929e-04
Loss = 4.0503e-01, PNorm = 61.8276, GNorm = 1.4639, lr_0 = 2.4911e-04
Loss = 3.4807e-01, PNorm = 61.8286, GNorm = 1.0940, lr_0 = 2.4894e-04
Loss = 3.2647e-01, PNorm = 61.8297, GNorm = 1.6539, lr_0 = 2.4877e-04
Loss = 3.7753e-01, PNorm = 61.8329, GNorm = 1.5818, lr_0 = 2.4860e-04
Loss = 3.9432e-01, PNorm = 61.8352, GNorm = 1.4994, lr_0 = 2.4843e-04
Loss = 3.7110e-01, PNorm = 61.8408, GNorm = 1.2729, lr_0 = 2.4826e-04
Loss = 3.5659e-01, PNorm = 61.8433, GNorm = 1.4324, lr_0 = 2.4809e-04
Loss = 3.7089e-01, PNorm = 61.8453, GNorm = 1.4077, lr_0 = 2.4792e-04
Loss = 3.4518e-01, PNorm = 61.8483, GNorm = 1.7133, lr_0 = 2.4775e-04
Loss = 3.7937e-01, PNorm = 61.8479, GNorm = 1.8356, lr_0 = 2.4758e-04
Loss = 3.5881e-01, PNorm = 61.8558, GNorm = 1.2581, lr_0 = 2.4741e-04
Loss = 3.7077e-01, PNorm = 61.8579, GNorm = 1.2150, lr_0 = 2.4724e-04
Loss = 3.5522e-01, PNorm = 61.8596, GNorm = 2.5777, lr_0 = 2.4707e-04
Validation mae = 0.111731
Epoch 19
Loss = 3.4730e-01, PNorm = 61.8621, GNorm = 2.0795, lr_0 = 2.4690e-04
Loss = 3.4912e-01, PNorm = 61.8667, GNorm = 0.9342, lr_0 = 2.4674e-04
Loss = 3.6601e-01, PNorm = 61.8725, GNorm = 1.9906, lr_0 = 2.4657e-04
Loss = 3.4702e-01, PNorm = 61.8751, GNorm = 2.1779, lr_0 = 2.4640e-04
Loss = 3.4833e-01, PNorm = 61.8823, GNorm = 1.2094, lr_0 = 2.4623e-04
Loss = 3.6151e-01, PNorm = 61.8832, GNorm = 1.6854, lr_0 = 2.4606e-04
Loss = 3.7050e-01, PNorm = 61.8851, GNorm = 1.2902, lr_0 = 2.4589e-04
Loss = 3.3876e-01, PNorm = 61.8864, GNorm = 1.3448, lr_0 = 2.4572e-04
Loss = 3.1894e-01, PNorm = 61.8909, GNorm = 2.0639, lr_0 = 2.4556e-04
Loss = 3.3708e-01, PNorm = 61.8928, GNorm = 1.4880, lr_0 = 2.4539e-04
Loss = 3.4210e-01, PNorm = 61.8962, GNorm = 1.5104, lr_0 = 2.4522e-04
Loss = 3.5491e-01, PNorm = 61.8949, GNorm = 1.7695, lr_0 = 2.4505e-04
Loss = 2.9919e-01, PNorm = 61.8988, GNorm = 1.1820, lr_0 = 2.4488e-04
Loss = 3.7847e-01, PNorm = 61.8985, GNorm = 3.2358, lr_0 = 2.4472e-04
Loss = 3.9437e-01, PNorm = 61.9001, GNorm = 2.1292, lr_0 = 2.4455e-04
Loss = 3.6449e-01, PNorm = 61.9052, GNorm = 1.2259, lr_0 = 2.4438e-04
Loss = 3.9969e-01, PNorm = 61.9113, GNorm = 1.1075, lr_0 = 2.4421e-04
Loss = 3.6428e-01, PNorm = 61.9174, GNorm = 1.5926, lr_0 = 2.4405e-04
Loss = 3.3019e-01, PNorm = 61.9197, GNorm = 1.1154, lr_0 = 2.4388e-04
Loss = 3.5353e-01, PNorm = 61.9191, GNorm = 1.6914, lr_0 = 2.4371e-04
Loss = 3.4430e-01, PNorm = 61.9190, GNorm = 1.7585, lr_0 = 2.4354e-04
Loss = 3.0487e-01, PNorm = 61.9211, GNorm = 1.4331, lr_0 = 2.4338e-04
Loss = 3.6183e-01, PNorm = 61.9238, GNorm = 1.1123, lr_0 = 2.4321e-04
Loss = 3.8526e-01, PNorm = 61.9280, GNorm = 1.6194, lr_0 = 2.4304e-04
Loss = 3.6296e-01, PNorm = 61.9307, GNorm = 1.7771, lr_0 = 2.4288e-04
Loss = 3.5506e-01, PNorm = 61.9342, GNorm = 1.3277, lr_0 = 2.4271e-04
Loss = 4.2082e-01, PNorm = 61.9353, GNorm = 1.5425, lr_0 = 2.4254e-04
Loss = 3.4952e-01, PNorm = 61.9371, GNorm = 1.4568, lr_0 = 2.4238e-04
Loss = 3.8942e-01, PNorm = 61.9407, GNorm = 1.5341, lr_0 = 2.4221e-04
Loss = 3.5816e-01, PNorm = 61.9435, GNorm = 1.5679, lr_0 = 2.4205e-04
Loss = 3.9251e-01, PNorm = 61.9493, GNorm = 1.7044, lr_0 = 2.4188e-04
Loss = 3.7979e-01, PNorm = 61.9527, GNorm = 1.7097, lr_0 = 2.4171e-04
Loss = 3.6622e-01, PNorm = 61.9535, GNorm = 1.1961, lr_0 = 2.4155e-04
Loss = 3.9703e-01, PNorm = 61.9550, GNorm = 2.1508, lr_0 = 2.4138e-04
Loss = 3.3918e-01, PNorm = 61.9627, GNorm = 1.7323, lr_0 = 2.4122e-04
Loss = 3.4220e-01, PNorm = 61.9644, GNorm = 1.5761, lr_0 = 2.4105e-04
Loss = 3.3150e-01, PNorm = 61.9682, GNorm = 1.0270, lr_0 = 2.4089e-04
Loss = 3.7279e-01, PNorm = 61.9705, GNorm = 1.3617, lr_0 = 2.4072e-04
Loss = 3.8333e-01, PNorm = 61.9710, GNorm = 1.4056, lr_0 = 2.4056e-04
Loss = 3.6821e-01, PNorm = 61.9735, GNorm = 1.3864, lr_0 = 2.4039e-04
Loss = 4.5424e-01, PNorm = 61.9738, GNorm = 1.2552, lr_0 = 2.4023e-04
Loss = 3.2180e-01, PNorm = 61.9763, GNorm = 1.7662, lr_0 = 2.4006e-04
Loss = 3.6827e-01, PNorm = 61.9778, GNorm = 1.4450, lr_0 = 2.3990e-04
Loss = 3.5796e-01, PNorm = 61.9816, GNorm = 1.3495, lr_0 = 2.3974e-04
Loss = 4.3919e-01, PNorm = 61.9832, GNorm = 1.4164, lr_0 = 2.3957e-04
Loss = 3.8330e-01, PNorm = 61.9871, GNorm = 1.4020, lr_0 = 2.3941e-04
Loss = 3.3221e-01, PNorm = 61.9920, GNorm = 1.3969, lr_0 = 2.3924e-04
Loss = 3.9839e-01, PNorm = 61.9929, GNorm = 1.3868, lr_0 = 2.3908e-04
Loss = 4.1310e-01, PNorm = 61.9943, GNorm = 2.0786, lr_0 = 2.3892e-04
Loss = 4.0851e-01, PNorm = 61.9994, GNorm = 1.2697, lr_0 = 2.3875e-04
Loss = 3.7803e-01, PNorm = 61.9994, GNorm = 1.0034, lr_0 = 2.3859e-04
Loss = 3.0837e-01, PNorm = 62.0023, GNorm = 1.1173, lr_0 = 2.3842e-04
Loss = 3.7460e-01, PNorm = 62.0050, GNorm = 1.9351, lr_0 = 2.3826e-04
Loss = 3.8355e-01, PNorm = 62.0076, GNorm = 1.3434, lr_0 = 2.3810e-04
Loss = 3.5579e-01, PNorm = 62.0089, GNorm = 1.6334, lr_0 = 2.3794e-04
Loss = 3.3938e-01, PNorm = 62.0129, GNorm = 1.8772, lr_0 = 2.3777e-04
Loss = 3.7507e-01, PNorm = 62.0169, GNorm = 0.9476, lr_0 = 2.3761e-04
Loss = 3.6074e-01, PNorm = 62.0193, GNorm = 1.5468, lr_0 = 2.3745e-04
Loss = 3.3655e-01, PNorm = 62.0217, GNorm = 1.3219, lr_0 = 2.3728e-04
Loss = 3.9667e-01, PNorm = 62.0242, GNorm = 1.2623, lr_0 = 2.3712e-04
Loss = 4.0238e-01, PNorm = 62.0263, GNorm = 1.6248, lr_0 = 2.3696e-04
Loss = 4.0114e-01, PNorm = 62.0292, GNorm = 1.7639, lr_0 = 2.3680e-04
Loss = 3.7662e-01, PNorm = 62.0308, GNorm = 1.8245, lr_0 = 2.3663e-04
Loss = 4.1165e-01, PNorm = 62.0352, GNorm = 1.2505, lr_0 = 2.3647e-04
Loss = 3.5469e-01, PNorm = 62.0400, GNorm = 1.1641, lr_0 = 2.3631e-04
Loss = 4.1276e-01, PNorm = 62.0395, GNorm = 1.6210, lr_0 = 2.3615e-04
Loss = 3.9260e-01, PNorm = 62.0397, GNorm = 1.2105, lr_0 = 2.3599e-04
Loss = 4.0734e-01, PNorm = 62.0431, GNorm = 1.5616, lr_0 = 2.3582e-04
Loss = 3.5670e-01, PNorm = 62.0456, GNorm = 1.3816, lr_0 = 2.3566e-04
Loss = 4.0799e-01, PNorm = 62.0489, GNorm = 1.2556, lr_0 = 2.3550e-04
Loss = 3.6800e-01, PNorm = 62.0523, GNorm = 1.5830, lr_0 = 2.3534e-04
Loss = 3.2405e-01, PNorm = 62.0545, GNorm = 1.1000, lr_0 = 2.3518e-04
Loss = 3.5807e-01, PNorm = 62.0565, GNorm = 2.3467, lr_0 = 2.3502e-04
Loss = 3.6821e-01, PNorm = 62.0577, GNorm = 1.6437, lr_0 = 2.3486e-04
Loss = 4.1216e-01, PNorm = 62.0609, GNorm = 1.2801, lr_0 = 2.3470e-04
Loss = 3.4986e-01, PNorm = 62.0608, GNorm = 1.5653, lr_0 = 2.3454e-04
Loss = 3.5963e-01, PNorm = 62.0624, GNorm = 1.7978, lr_0 = 2.3437e-04
Loss = 3.5070e-01, PNorm = 62.0661, GNorm = 1.4843, lr_0 = 2.3421e-04
Loss = 3.7029e-01, PNorm = 62.0701, GNorm = 1.4950, lr_0 = 2.3405e-04
Loss = 3.4879e-01, PNorm = 62.0733, GNorm = 1.6567, lr_0 = 2.3389e-04
Loss = 3.5172e-01, PNorm = 62.0735, GNorm = 1.4249, lr_0 = 2.3373e-04
Loss = 2.8326e-01, PNorm = 62.0742, GNorm = 1.1994, lr_0 = 2.3357e-04
Loss = 3.4532e-01, PNorm = 62.0778, GNorm = 1.6109, lr_0 = 2.3341e-04
Loss = 3.5901e-01, PNorm = 62.0789, GNorm = 1.6681, lr_0 = 2.3325e-04
Loss = 3.8328e-01, PNorm = 62.0810, GNorm = 1.9138, lr_0 = 2.3309e-04
Loss = 3.5342e-01, PNorm = 62.0827, GNorm = 2.1974, lr_0 = 2.3293e-04
Loss = 3.3810e-01, PNorm = 62.0867, GNorm = 1.3792, lr_0 = 2.3277e-04
Loss = 2.8307e-01, PNorm = 62.0870, GNorm = 1.2754, lr_0 = 2.3261e-04
Loss = 3.3955e-01, PNorm = 62.0900, GNorm = 1.3333, lr_0 = 2.3246e-04
Loss = 3.8810e-01, PNorm = 62.0920, GNorm = 1.6761, lr_0 = 2.3230e-04
Loss = 4.8210e-01, PNorm = 62.0968, GNorm = 1.7831, lr_0 = 2.3214e-04
Loss = 3.7570e-01, PNorm = 62.1004, GNorm = 1.8795, lr_0 = 2.3198e-04
Loss = 3.8664e-01, PNorm = 62.1017, GNorm = 1.8384, lr_0 = 2.3182e-04
Loss = 3.9175e-01, PNorm = 62.1051, GNorm = 1.2728, lr_0 = 2.3166e-04
Loss = 4.1713e-01, PNorm = 62.1081, GNorm = 1.4444, lr_0 = 2.3150e-04
Loss = 3.8089e-01, PNorm = 62.1124, GNorm = 1.5080, lr_0 = 2.3134e-04
Loss = 3.7119e-01, PNorm = 62.1124, GNorm = 1.3936, lr_0 = 2.3118e-04
Loss = 4.4199e-01, PNorm = 62.1164, GNorm = 2.6201, lr_0 = 2.3103e-04
Loss = 3.7102e-01, PNorm = 62.1219, GNorm = 1.2035, lr_0 = 2.3087e-04
Loss = 3.8263e-01, PNorm = 62.1225, GNorm = 1.9940, lr_0 = 2.3071e-04
Loss = 4.0286e-01, PNorm = 62.1241, GNorm = 1.7103, lr_0 = 2.3055e-04
Loss = 3.7400e-01, PNorm = 62.1237, GNorm = 1.6187, lr_0 = 2.3039e-04
Loss = 3.5365e-01, PNorm = 62.1236, GNorm = 1.3847, lr_0 = 2.3024e-04
Loss = 4.1568e-01, PNorm = 62.1244, GNorm = 1.1171, lr_0 = 2.3008e-04
Loss = 3.3158e-01, PNorm = 62.1275, GNorm = 1.3097, lr_0 = 2.2992e-04
Loss = 3.4196e-01, PNorm = 62.1321, GNorm = 1.2893, lr_0 = 2.2976e-04
Loss = 3.4682e-01, PNorm = 62.1366, GNorm = 1.5486, lr_0 = 2.2961e-04
Loss = 4.1448e-01, PNorm = 62.1356, GNorm = 1.6076, lr_0 = 2.2945e-04
Loss = 3.9875e-01, PNorm = 62.1379, GNorm = 1.5915, lr_0 = 2.2929e-04
Loss = 3.9037e-01, PNorm = 62.1420, GNorm = 1.4064, lr_0 = 2.2913e-04
Loss = 3.4381e-01, PNorm = 62.1431, GNorm = 1.1300, lr_0 = 2.2898e-04
Loss = 3.2596e-01, PNorm = 62.1454, GNorm = 2.1088, lr_0 = 2.2882e-04
Loss = 4.0560e-01, PNorm = 62.1468, GNorm = 1.7334, lr_0 = 2.2866e-04
Loss = 3.2870e-01, PNorm = 62.1485, GNorm = 1.2155, lr_0 = 2.2851e-04
Loss = 3.8083e-01, PNorm = 62.1531, GNorm = 1.5917, lr_0 = 2.2835e-04
Loss = 3.7737e-01, PNorm = 62.1548, GNorm = 1.2387, lr_0 = 2.2819e-04
Loss = 3.5095e-01, PNorm = 62.1553, GNorm = 1.5395, lr_0 = 2.2804e-04
Loss = 3.4778e-01, PNorm = 62.1554, GNorm = 2.0068, lr_0 = 2.2788e-04
Loss = 3.8370e-01, PNorm = 62.1566, GNorm = 1.6725, lr_0 = 2.2773e-04
Loss = 3.8759e-01, PNorm = 62.1569, GNorm = 1.5456, lr_0 = 2.2757e-04
Validation mae = 0.113060
Epoch 20
Loss = 3.4273e-01, PNorm = 62.1579, GNorm = 1.3199, lr_0 = 2.2741e-04
Loss = 3.5607e-01, PNorm = 62.1621, GNorm = 1.3035, lr_0 = 2.2726e-04
Loss = 3.6757e-01, PNorm = 62.1661, GNorm = 1.1093, lr_0 = 2.2710e-04
Loss = 3.7302e-01, PNorm = 62.1690, GNorm = 1.5760, lr_0 = 2.2695e-04
Loss = 3.8337e-01, PNorm = 62.1681, GNorm = 1.7320, lr_0 = 2.2679e-04
Loss = 3.3577e-01, PNorm = 62.1731, GNorm = 1.5628, lr_0 = 2.2664e-04
Loss = 3.8161e-01, PNorm = 62.1769, GNorm = 1.6430, lr_0 = 2.2648e-04
Loss = 3.3867e-01, PNorm = 62.1823, GNorm = 1.4731, lr_0 = 2.2632e-04
Loss = 4.2494e-01, PNorm = 62.1835, GNorm = 1.6089, lr_0 = 2.2617e-04
Loss = 4.1420e-01, PNorm = 62.1867, GNorm = 3.0063, lr_0 = 2.2601e-04
Loss = 3.2571e-01, PNorm = 62.1921, GNorm = 1.2449, lr_0 = 2.2586e-04
Loss = 3.8159e-01, PNorm = 62.1917, GNorm = 2.6580, lr_0 = 2.2571e-04
Loss = 3.1172e-01, PNorm = 62.1978, GNorm = 1.3779, lr_0 = 2.2555e-04
Loss = 3.6021e-01, PNorm = 62.2017, GNorm = 1.2746, lr_0 = 2.2540e-04
Loss = 4.2136e-01, PNorm = 62.2070, GNorm = 1.3423, lr_0 = 2.2524e-04
Loss = 3.1561e-01, PNorm = 62.2082, GNorm = 1.1988, lr_0 = 2.2509e-04
Loss = 3.1482e-01, PNorm = 62.2084, GNorm = 1.3775, lr_0 = 2.2493e-04
Loss = 3.6910e-01, PNorm = 62.2098, GNorm = 1.9044, lr_0 = 2.2478e-04
Loss = 3.6678e-01, PNorm = 62.2082, GNorm = 1.5716, lr_0 = 2.2463e-04
Loss = 3.7139e-01, PNorm = 62.2107, GNorm = 1.4502, lr_0 = 2.2447e-04
Loss = 3.4267e-01, PNorm = 62.2141, GNorm = 1.6965, lr_0 = 2.2432e-04
Loss = 3.5897e-01, PNorm = 62.2166, GNorm = 1.5404, lr_0 = 2.2416e-04
Loss = 3.4315e-01, PNorm = 62.2195, GNorm = 1.2191, lr_0 = 2.2401e-04
Loss = 3.3189e-01, PNorm = 62.2202, GNorm = 1.3112, lr_0 = 2.2386e-04
Loss = 4.3660e-01, PNorm = 62.2199, GNorm = 1.6111, lr_0 = 2.2370e-04
Loss = 3.2711e-01, PNorm = 62.2193, GNorm = 1.8408, lr_0 = 2.2355e-04
Loss = 3.5106e-01, PNorm = 62.2217, GNorm = 1.2533, lr_0 = 2.2340e-04
Loss = 3.6208e-01, PNorm = 62.2280, GNorm = 1.9925, lr_0 = 2.2324e-04
Loss = 4.1263e-01, PNorm = 62.2318, GNorm = 1.5697, lr_0 = 2.2309e-04
Loss = 4.1172e-01, PNorm = 62.2349, GNorm = 1.8241, lr_0 = 2.2294e-04
Loss = 3.7224e-01, PNorm = 62.2367, GNorm = 1.4606, lr_0 = 2.2279e-04
Loss = 3.7299e-01, PNorm = 62.2406, GNorm = 1.4118, lr_0 = 2.2263e-04
Loss = 3.8192e-01, PNorm = 62.2399, GNorm = 1.2317, lr_0 = 2.2248e-04
Loss = 3.6437e-01, PNorm = 62.2415, GNorm = 1.7876, lr_0 = 2.2233e-04
Loss = 3.5720e-01, PNorm = 62.2427, GNorm = 1.4878, lr_0 = 2.2218e-04
Loss = 3.4236e-01, PNorm = 62.2467, GNorm = 1.1804, lr_0 = 2.2202e-04
Loss = 3.9264e-01, PNorm = 62.2501, GNorm = 1.1618, lr_0 = 2.2187e-04
Loss = 3.6372e-01, PNorm = 62.2510, GNorm = 1.7218, lr_0 = 2.2172e-04
Loss = 3.4925e-01, PNorm = 62.2522, GNorm = 1.4447, lr_0 = 2.2157e-04
Loss = 4.5715e-01, PNorm = 62.2520, GNorm = 1.5837, lr_0 = 2.2142e-04
Loss = 3.5547e-01, PNorm = 62.2502, GNorm = 1.5870, lr_0 = 2.2126e-04
Loss = 2.8906e-01, PNorm = 62.2542, GNorm = 1.3653, lr_0 = 2.2111e-04
Loss = 3.3991e-01, PNorm = 62.2579, GNorm = 1.1487, lr_0 = 2.2096e-04
Loss = 3.3597e-01, PNorm = 62.2581, GNorm = 1.3653, lr_0 = 2.2081e-04
Loss = 3.4604e-01, PNorm = 62.2623, GNorm = 1.2648, lr_0 = 2.2066e-04
Loss = 4.1020e-01, PNorm = 62.2656, GNorm = 1.2710, lr_0 = 2.2051e-04
Loss = 3.7725e-01, PNorm = 62.2693, GNorm = 1.4832, lr_0 = 2.2036e-04
Loss = 3.3882e-01, PNorm = 62.2732, GNorm = 1.1776, lr_0 = 2.2021e-04
Loss = 3.7732e-01, PNorm = 62.2763, GNorm = 1.5907, lr_0 = 2.2005e-04
Loss = 3.6239e-01, PNorm = 62.2794, GNorm = 1.6423, lr_0 = 2.1990e-04
Loss = 3.9410e-01, PNorm = 62.2834, GNorm = 1.2247, lr_0 = 2.1975e-04
Loss = 3.6437e-01, PNorm = 62.2850, GNorm = 1.2356, lr_0 = 2.1960e-04
Loss = 3.0036e-01, PNorm = 62.2851, GNorm = 1.0746, lr_0 = 2.1945e-04
Loss = 3.1444e-01, PNorm = 62.2864, GNorm = 1.1806, lr_0 = 2.1930e-04
Loss = 3.3366e-01, PNorm = 62.2874, GNorm = 1.3345, lr_0 = 2.1915e-04
Loss = 3.9126e-01, PNorm = 62.2874, GNorm = 1.5936, lr_0 = 2.1900e-04
Loss = 3.9779e-01, PNorm = 62.2920, GNorm = 1.4692, lr_0 = 2.1885e-04
Loss = 3.7205e-01, PNorm = 62.2965, GNorm = 1.4293, lr_0 = 2.1870e-04
Loss = 3.6252e-01, PNorm = 62.2945, GNorm = 1.2873, lr_0 = 2.1855e-04
Loss = 3.3497e-01, PNorm = 62.2983, GNorm = 1.1965, lr_0 = 2.1840e-04
Loss = 3.5183e-01, PNorm = 62.2985, GNorm = 1.4504, lr_0 = 2.1825e-04
Loss = 3.9416e-01, PNorm = 62.2972, GNorm = 1.0251, lr_0 = 2.1810e-04
Loss = 3.7696e-01, PNorm = 62.3006, GNorm = 1.9536, lr_0 = 2.1795e-04
Loss = 3.6996e-01, PNorm = 62.3037, GNorm = 1.5216, lr_0 = 2.1780e-04
Loss = 3.8728e-01, PNorm = 62.3044, GNorm = 1.1362, lr_0 = 2.1765e-04
Loss = 4.2178e-01, PNorm = 62.3034, GNorm = 1.4878, lr_0 = 2.1751e-04
Loss = 3.6638e-01, PNorm = 62.3057, GNorm = 1.3403, lr_0 = 2.1736e-04
Loss = 3.3890e-01, PNorm = 62.3073, GNorm = 1.3477, lr_0 = 2.1721e-04
Loss = 3.5995e-01, PNorm = 62.3119, GNorm = 1.1082, lr_0 = 2.1706e-04
Loss = 3.3662e-01, PNorm = 62.3157, GNorm = 1.4759, lr_0 = 2.1691e-04
Loss = 3.9507e-01, PNorm = 62.3212, GNorm = 1.5108, lr_0 = 2.1676e-04
Loss = 3.9263e-01, PNorm = 62.3253, GNorm = 1.2732, lr_0 = 2.1661e-04
Loss = 3.7281e-01, PNorm = 62.3238, GNorm = 1.4456, lr_0 = 2.1646e-04
Loss = 3.6068e-01, PNorm = 62.3271, GNorm = 1.3608, lr_0 = 2.1632e-04
Loss = 3.5627e-01, PNorm = 62.3295, GNorm = 1.2525, lr_0 = 2.1617e-04
Loss = 3.9816e-01, PNorm = 62.3293, GNorm = 1.1327, lr_0 = 2.1602e-04
Loss = 3.3761e-01, PNorm = 62.3302, GNorm = 1.2210, lr_0 = 2.1587e-04
Loss = 3.4548e-01, PNorm = 62.3302, GNorm = 0.8962, lr_0 = 2.1572e-04
Loss = 3.6009e-01, PNorm = 62.3317, GNorm = 1.0796, lr_0 = 2.1558e-04
Loss = 4.2313e-01, PNorm = 62.3322, GNorm = 1.2679, lr_0 = 2.1543e-04
Loss = 3.5839e-01, PNorm = 62.3323, GNorm = 1.7131, lr_0 = 2.1528e-04
Loss = 3.9186e-01, PNorm = 62.3357, GNorm = 1.2264, lr_0 = 2.1513e-04
Loss = 3.6914e-01, PNorm = 62.3412, GNorm = 1.8583, lr_0 = 2.1499e-04
Loss = 3.9070e-01, PNorm = 62.3431, GNorm = 2.2530, lr_0 = 2.1484e-04
Loss = 3.4839e-01, PNorm = 62.3475, GNorm = 1.6137, lr_0 = 2.1469e-04
Loss = 3.7499e-01, PNorm = 62.3504, GNorm = 1.3813, lr_0 = 2.1454e-04
Loss = 3.7388e-01, PNorm = 62.3547, GNorm = 0.9678, lr_0 = 2.1440e-04
Loss = 3.6708e-01, PNorm = 62.3585, GNorm = 2.5117, lr_0 = 2.1425e-04
Loss = 3.6601e-01, PNorm = 62.3617, GNorm = 1.6509, lr_0 = 2.1410e-04
Loss = 3.7234e-01, PNorm = 62.3638, GNorm = 1.0277, lr_0 = 2.1396e-04
Loss = 4.0018e-01, PNorm = 62.3641, GNorm = 1.6381, lr_0 = 2.1381e-04
Loss = 3.8440e-01, PNorm = 62.3654, GNorm = 1.5257, lr_0 = 2.1366e-04
Loss = 3.4715e-01, PNorm = 62.3654, GNorm = 1.1596, lr_0 = 2.1352e-04
Loss = 3.3063e-01, PNorm = 62.3660, GNorm = 0.9962, lr_0 = 2.1337e-04
Loss = 3.5685e-01, PNorm = 62.3669, GNorm = 1.4156, lr_0 = 2.1323e-04
Loss = 3.5400e-01, PNorm = 62.3717, GNorm = 1.2719, lr_0 = 2.1308e-04
Loss = 3.9897e-01, PNorm = 62.3726, GNorm = 1.4522, lr_0 = 2.1293e-04
Loss = 3.3104e-01, PNorm = 62.3765, GNorm = 1.1653, lr_0 = 2.1279e-04
Loss = 3.2821e-01, PNorm = 62.3798, GNorm = 1.0176, lr_0 = 2.1264e-04
Loss = 3.5282e-01, PNorm = 62.3810, GNorm = 1.1828, lr_0 = 2.1250e-04
Loss = 3.4505e-01, PNorm = 62.3832, GNorm = 1.3909, lr_0 = 2.1235e-04
Loss = 4.0212e-01, PNorm = 62.3861, GNorm = 1.5548, lr_0 = 2.1221e-04
Loss = 4.1201e-01, PNorm = 62.3908, GNorm = 1.4294, lr_0 = 2.1206e-04
Loss = 3.3862e-01, PNorm = 62.3949, GNorm = 1.3767, lr_0 = 2.1191e-04
Loss = 3.3043e-01, PNorm = 62.3966, GNorm = 1.4095, lr_0 = 2.1177e-04
Loss = 3.6937e-01, PNorm = 62.3980, GNorm = 1.3418, lr_0 = 2.1162e-04
Loss = 3.5979e-01, PNorm = 62.3993, GNorm = 1.4294, lr_0 = 2.1148e-04
Loss = 3.8096e-01, PNorm = 62.4012, GNorm = 1.5120, lr_0 = 2.1133e-04
Loss = 3.2948e-01, PNorm = 62.4025, GNorm = 1.4736, lr_0 = 2.1119e-04
Loss = 3.8329e-01, PNorm = 62.4033, GNorm = 1.2493, lr_0 = 2.1104e-04
Loss = 4.2373e-01, PNorm = 62.4057, GNorm = 1.7459, lr_0 = 2.1090e-04
Loss = 3.4961e-01, PNorm = 62.4065, GNorm = 1.5040, lr_0 = 2.1076e-04
Loss = 3.5792e-01, PNorm = 62.4088, GNorm = 1.9515, lr_0 = 2.1061e-04
Loss = 3.4824e-01, PNorm = 62.4107, GNorm = 1.2219, lr_0 = 2.1047e-04
Loss = 4.4844e-01, PNorm = 62.4126, GNorm = 1.9925, lr_0 = 2.1032e-04
Loss = 3.4512e-01, PNorm = 62.4143, GNorm = 1.3815, lr_0 = 2.1018e-04
Loss = 3.7169e-01, PNorm = 62.4176, GNorm = 1.4044, lr_0 = 2.1003e-04
Loss = 3.4698e-01, PNorm = 62.4220, GNorm = 1.9226, lr_0 = 2.0989e-04
Loss = 3.2234e-01, PNorm = 62.4227, GNorm = 1.5903, lr_0 = 2.0975e-04
Loss = 3.9091e-01, PNorm = 62.4258, GNorm = 2.7788, lr_0 = 2.0960e-04
Validation mae = 0.111446
Epoch 21
Loss = 3.8249e-01, PNorm = 62.4253, GNorm = 1.1022, lr_0 = 2.0946e-04
Loss = 3.3670e-01, PNorm = 62.4298, GNorm = 1.4448, lr_0 = 2.0932e-04
Loss = 3.5568e-01, PNorm = 62.4334, GNorm = 1.3649, lr_0 = 2.0917e-04
Loss = 3.4399e-01, PNorm = 62.4339, GNorm = 1.5928, lr_0 = 2.0903e-04
Loss = 3.5077e-01, PNorm = 62.4375, GNorm = 1.3517, lr_0 = 2.0889e-04
Loss = 3.6301e-01, PNorm = 62.4418, GNorm = 1.6616, lr_0 = 2.0874e-04
Loss = 3.5908e-01, PNorm = 62.4473, GNorm = 1.5392, lr_0 = 2.0860e-04
Loss = 3.2522e-01, PNorm = 62.4496, GNorm = 1.4533, lr_0 = 2.0846e-04
Loss = 2.6713e-01, PNorm = 62.4491, GNorm = 1.7230, lr_0 = 2.0831e-04
Loss = 3.2754e-01, PNorm = 62.4520, GNorm = 1.0699, lr_0 = 2.0817e-04
Loss = 3.4966e-01, PNorm = 62.4569, GNorm = 1.2771, lr_0 = 2.0803e-04
Loss = 3.5889e-01, PNorm = 62.4608, GNorm = 1.3958, lr_0 = 2.0789e-04
Loss = 3.6140e-01, PNorm = 62.4619, GNorm = 1.8971, lr_0 = 2.0774e-04
Loss = 3.4247e-01, PNorm = 62.4636, GNorm = 1.0784, lr_0 = 2.0760e-04
Loss = 3.5222e-01, PNorm = 62.4636, GNorm = 1.8797, lr_0 = 2.0746e-04
Loss = 4.1434e-01, PNorm = 62.4671, GNorm = 1.6305, lr_0 = 2.0732e-04
Loss = 3.8795e-01, PNorm = 62.4692, GNorm = 2.1043, lr_0 = 2.0718e-04
Loss = 3.4502e-01, PNorm = 62.4730, GNorm = 1.3918, lr_0 = 2.0703e-04
Loss = 3.4661e-01, PNorm = 62.4786, GNorm = 1.2075, lr_0 = 2.0689e-04
Loss = 3.7508e-01, PNorm = 62.4814, GNorm = 1.8082, lr_0 = 2.0675e-04
Loss = 3.5936e-01, PNorm = 62.4819, GNorm = 1.0720, lr_0 = 2.0661e-04
Loss = 3.8265e-01, PNorm = 62.4842, GNorm = 1.1170, lr_0 = 2.0647e-04
Loss = 3.8947e-01, PNorm = 62.4863, GNorm = 1.5125, lr_0 = 2.0633e-04
Loss = 3.5614e-01, PNorm = 62.4891, GNorm = 1.2770, lr_0 = 2.0618e-04
Loss = 2.9980e-01, PNorm = 62.4920, GNorm = 1.4719, lr_0 = 2.0604e-04
Loss = 3.7382e-01, PNorm = 62.4946, GNorm = 2.1092, lr_0 = 2.0590e-04
Loss = 3.2592e-01, PNorm = 62.4945, GNorm = 1.5047, lr_0 = 2.0576e-04
Loss = 3.8006e-01, PNorm = 62.4959, GNorm = 1.4257, lr_0 = 2.0562e-04
Loss = 3.7863e-01, PNorm = 62.4973, GNorm = 1.3120, lr_0 = 2.0548e-04
Loss = 3.8267e-01, PNorm = 62.4984, GNorm = 1.3504, lr_0 = 2.0534e-04
Loss = 3.4700e-01, PNorm = 62.5031, GNorm = 1.1689, lr_0 = 2.0520e-04
Loss = 3.3814e-01, PNorm = 62.5066, GNorm = 1.2224, lr_0 = 2.0506e-04
Loss = 3.5041e-01, PNorm = 62.5109, GNorm = 1.1768, lr_0 = 2.0492e-04
Loss = 3.9238e-01, PNorm = 62.5134, GNorm = 1.3261, lr_0 = 2.0478e-04
Loss = 3.5918e-01, PNorm = 62.5159, GNorm = 1.6445, lr_0 = 2.0464e-04
Loss = 3.5594e-01, PNorm = 62.5149, GNorm = 1.1010, lr_0 = 2.0450e-04
Loss = 3.7108e-01, PNorm = 62.5170, GNorm = 1.3442, lr_0 = 2.0436e-04
Loss = 3.5904e-01, PNorm = 62.5200, GNorm = 1.7910, lr_0 = 2.0422e-04
Loss = 3.3472e-01, PNorm = 62.5220, GNorm = 1.2595, lr_0 = 2.0408e-04
Loss = 3.6615e-01, PNorm = 62.5233, GNorm = 1.5011, lr_0 = 2.0394e-04
Loss = 3.5351e-01, PNorm = 62.5271, GNorm = 1.3420, lr_0 = 2.0380e-04
Loss = 4.0672e-01, PNorm = 62.5309, GNorm = 1.1650, lr_0 = 2.0366e-04
Loss = 3.6268e-01, PNorm = 62.5301, GNorm = 1.1207, lr_0 = 2.0352e-04
Loss = 4.0144e-01, PNorm = 62.5336, GNorm = 1.5162, lr_0 = 2.0338e-04
Loss = 3.5686e-01, PNorm = 62.5359, GNorm = 1.3523, lr_0 = 2.0324e-04
Loss = 3.6807e-01, PNorm = 62.5382, GNorm = 1.2907, lr_0 = 2.0310e-04
Loss = 3.8142e-01, PNorm = 62.5402, GNorm = 1.8988, lr_0 = 2.0296e-04
Loss = 3.8623e-01, PNorm = 62.5420, GNorm = 0.8829, lr_0 = 2.0282e-04
Loss = 3.7978e-01, PNorm = 62.5428, GNorm = 1.3050, lr_0 = 2.0268e-04
Loss = 3.9332e-01, PNorm = 62.5448, GNorm = 0.9876, lr_0 = 2.0254e-04
Loss = 3.1801e-01, PNorm = 62.5470, GNorm = 1.6657, lr_0 = 2.0240e-04
Loss = 3.5063e-01, PNorm = 62.5510, GNorm = 1.1277, lr_0 = 2.0227e-04
Loss = 2.9932e-01, PNorm = 62.5529, GNorm = 1.7276, lr_0 = 2.0213e-04
Loss = 3.4763e-01, PNorm = 62.5546, GNorm = 0.9963, lr_0 = 2.0199e-04
Loss = 3.8994e-01, PNorm = 62.5588, GNorm = 1.0301, lr_0 = 2.0185e-04
Loss = 3.5660e-01, PNorm = 62.5647, GNorm = 1.7657, lr_0 = 2.0171e-04
Loss = 4.3868e-01, PNorm = 62.5628, GNorm = 1.7405, lr_0 = 2.0157e-04
Loss = 3.4838e-01, PNorm = 62.5604, GNorm = 1.1162, lr_0 = 2.0144e-04
Loss = 3.4615e-01, PNorm = 62.5610, GNorm = 1.3243, lr_0 = 2.0130e-04
Loss = 3.8868e-01, PNorm = 62.5641, GNorm = 1.2400, lr_0 = 2.0116e-04
Loss = 3.4524e-01, PNorm = 62.5686, GNorm = 1.6931, lr_0 = 2.0102e-04
Loss = 3.5750e-01, PNorm = 62.5728, GNorm = 1.2096, lr_0 = 2.0088e-04
Loss = 3.5879e-01, PNorm = 62.5746, GNorm = 1.3480, lr_0 = 2.0075e-04
Loss = 3.1982e-01, PNorm = 62.5782, GNorm = 1.6465, lr_0 = 2.0061e-04
Loss = 3.7406e-01, PNorm = 62.5798, GNorm = 1.8325, lr_0 = 2.0047e-04
Loss = 4.1741e-01, PNorm = 62.5763, GNorm = 1.4047, lr_0 = 2.0033e-04
Loss = 3.1900e-01, PNorm = 62.5803, GNorm = 0.9166, lr_0 = 2.0020e-04
Loss = 3.4682e-01, PNorm = 62.5845, GNorm = 1.5137, lr_0 = 2.0006e-04
Loss = 3.3898e-01, PNorm = 62.5862, GNorm = 1.5324, lr_0 = 1.9992e-04
Loss = 3.6448e-01, PNorm = 62.5885, GNorm = 1.2587, lr_0 = 1.9979e-04
Loss = 3.2334e-01, PNorm = 62.5892, GNorm = 0.9505, lr_0 = 1.9965e-04
Loss = 3.2545e-01, PNorm = 62.5869, GNorm = 1.3917, lr_0 = 1.9951e-04
Loss = 3.7758e-01, PNorm = 62.5868, GNorm = 1.4474, lr_0 = 1.9938e-04
Loss = 3.3802e-01, PNorm = 62.5881, GNorm = 1.3374, lr_0 = 1.9924e-04
Loss = 3.9152e-01, PNorm = 62.5915, GNorm = 1.2359, lr_0 = 1.9910e-04
Loss = 3.6273e-01, PNorm = 62.5934, GNorm = 1.2137, lr_0 = 1.9897e-04
Loss = 3.4730e-01, PNorm = 62.5978, GNorm = 1.5539, lr_0 = 1.9883e-04
Loss = 4.2282e-01, PNorm = 62.6011, GNorm = 1.8338, lr_0 = 1.9869e-04
Loss = 3.8332e-01, PNorm = 62.6032, GNorm = 1.1210, lr_0 = 1.9856e-04
Loss = 3.7827e-01, PNorm = 62.6025, GNorm = 1.4376, lr_0 = 1.9842e-04
Loss = 3.4027e-01, PNorm = 62.6026, GNorm = 1.4670, lr_0 = 1.9829e-04
Loss = 3.5483e-01, PNorm = 62.6032, GNorm = 1.3215, lr_0 = 1.9815e-04
Loss = 3.7486e-01, PNorm = 62.6045, GNorm = 2.0151, lr_0 = 1.9801e-04
Loss = 3.5457e-01, PNorm = 62.6080, GNorm = 1.2365, lr_0 = 1.9788e-04
Loss = 3.5820e-01, PNorm = 62.6102, GNorm = 1.2604, lr_0 = 1.9774e-04
Loss = 4.0622e-01, PNorm = 62.6122, GNorm = 1.6692, lr_0 = 1.9761e-04
Loss = 3.8809e-01, PNorm = 62.6145, GNorm = 1.2379, lr_0 = 1.9747e-04
Loss = 3.4399e-01, PNorm = 62.6126, GNorm = 1.2807, lr_0 = 1.9734e-04
Loss = 3.8386e-01, PNorm = 62.6180, GNorm = 1.7419, lr_0 = 1.9720e-04
Loss = 3.8932e-01, PNorm = 62.6203, GNorm = 1.7745, lr_0 = 1.9707e-04
Loss = 3.4936e-01, PNorm = 62.6217, GNorm = 1.5352, lr_0 = 1.9693e-04
Loss = 3.6706e-01, PNorm = 62.6233, GNorm = 2.0912, lr_0 = 1.9680e-04
Loss = 3.7164e-01, PNorm = 62.6250, GNorm = 1.8063, lr_0 = 1.9666e-04
Loss = 3.6738e-01, PNorm = 62.6304, GNorm = 1.8316, lr_0 = 1.9653e-04
Loss = 4.2243e-01, PNorm = 62.6305, GNorm = 1.1341, lr_0 = 1.9639e-04
Loss = 3.5853e-01, PNorm = 62.6322, GNorm = 1.3245, lr_0 = 1.9626e-04
Loss = 3.7171e-01, PNorm = 62.6363, GNorm = 1.6198, lr_0 = 1.9612e-04
Loss = 3.8224e-01, PNorm = 62.6408, GNorm = 1.3198, lr_0 = 1.9599e-04
Loss = 3.7719e-01, PNorm = 62.6406, GNorm = 1.8563, lr_0 = 1.9585e-04
Loss = 3.6435e-01, PNorm = 62.6407, GNorm = 1.5100, lr_0 = 1.9572e-04
Loss = 3.3824e-01, PNorm = 62.6435, GNorm = 1.1935, lr_0 = 1.9559e-04
Loss = 3.2482e-01, PNorm = 62.6424, GNorm = 1.2462, lr_0 = 1.9545e-04
Loss = 2.9751e-01, PNorm = 62.6430, GNorm = 1.1398, lr_0 = 1.9532e-04
Loss = 4.0691e-01, PNorm = 62.6473, GNorm = 1.1528, lr_0 = 1.9518e-04
Loss = 3.5982e-01, PNorm = 62.6510, GNorm = 1.4033, lr_0 = 1.9505e-04
Loss = 3.4635e-01, PNorm = 62.6506, GNorm = 1.3307, lr_0 = 1.9492e-04
Loss = 3.7297e-01, PNorm = 62.6527, GNorm = 1.0411, lr_0 = 1.9478e-04
Loss = 3.3570e-01, PNorm = 62.6576, GNorm = 1.2715, lr_0 = 1.9465e-04
Loss = 3.3078e-01, PNorm = 62.6596, GNorm = 1.9666, lr_0 = 1.9452e-04
Loss = 3.5328e-01, PNorm = 62.6615, GNorm = 1.8550, lr_0 = 1.9438e-04
Loss = 3.3064e-01, PNorm = 62.6656, GNorm = 1.2273, lr_0 = 1.9425e-04
Loss = 3.9474e-01, PNorm = 62.6683, GNorm = 1.9105, lr_0 = 1.9412e-04
Loss = 3.6284e-01, PNorm = 62.6665, GNorm = 1.5940, lr_0 = 1.9398e-04
Loss = 3.1805e-01, PNorm = 62.6686, GNorm = 1.3915, lr_0 = 1.9385e-04
Loss = 3.8397e-01, PNorm = 62.6690, GNorm = 1.2226, lr_0 = 1.9372e-04
Loss = 3.7606e-01, PNorm = 62.6729, GNorm = 0.9997, lr_0 = 1.9359e-04
Loss = 3.3348e-01, PNorm = 62.6743, GNorm = 1.5091, lr_0 = 1.9345e-04
Loss = 3.2514e-01, PNorm = 62.6767, GNorm = 1.4924, lr_0 = 1.9332e-04
Loss = 3.5720e-01, PNorm = 62.6798, GNorm = 1.1962, lr_0 = 1.9319e-04
Loss = 3.9206e-01, PNorm = 62.6812, GNorm = 1.4516, lr_0 = 1.9306e-04
Validation mae = 0.111654
Epoch 22
Loss = 4.3762e-01, PNorm = 62.6834, GNorm = 1.1254, lr_0 = 1.9292e-04
Loss = 3.5076e-01, PNorm = 62.6863, GNorm = 1.2208, lr_0 = 1.9279e-04
Loss = 3.9436e-01, PNorm = 62.6877, GNorm = 1.7647, lr_0 = 1.9266e-04
Loss = 3.7189e-01, PNorm = 62.6908, GNorm = 1.2039, lr_0 = 1.9253e-04
Loss = 3.3075e-01, PNorm = 62.6909, GNorm = 1.4856, lr_0 = 1.9240e-04
Loss = 3.6655e-01, PNorm = 62.6922, GNorm = 1.6361, lr_0 = 1.9226e-04
Loss = 3.8800e-01, PNorm = 62.6946, GNorm = 1.4516, lr_0 = 1.9213e-04
Loss = 3.4133e-01, PNorm = 62.6968, GNorm = 1.7179, lr_0 = 1.9200e-04
Loss = 3.7148e-01, PNorm = 62.6994, GNorm = 1.6978, lr_0 = 1.9187e-04
Loss = 3.3836e-01, PNorm = 62.7006, GNorm = 1.5756, lr_0 = 1.9174e-04
Loss = 3.8624e-01, PNorm = 62.7029, GNorm = 1.5908, lr_0 = 1.9161e-04
Loss = 3.8462e-01, PNorm = 62.7060, GNorm = 1.3972, lr_0 = 1.9148e-04
Loss = 3.8687e-01, PNorm = 62.7102, GNorm = 1.6037, lr_0 = 1.9134e-04
Loss = 3.0313e-01, PNorm = 62.7106, GNorm = 1.2805, lr_0 = 1.9121e-04
Loss = 3.5493e-01, PNorm = 62.7138, GNorm = 1.1470, lr_0 = 1.9108e-04
Loss = 3.8049e-01, PNorm = 62.7170, GNorm = 1.8459, lr_0 = 1.9095e-04
Loss = 3.3723e-01, PNorm = 62.7176, GNorm = 1.5040, lr_0 = 1.9082e-04
Loss = 4.1035e-01, PNorm = 62.7186, GNorm = 1.8391, lr_0 = 1.9069e-04
Loss = 3.8901e-01, PNorm = 62.7215, GNorm = 1.6269, lr_0 = 1.9056e-04
Loss = 3.3939e-01, PNorm = 62.7248, GNorm = 1.3558, lr_0 = 1.9043e-04
Loss = 3.6633e-01, PNorm = 62.7259, GNorm = 1.4046, lr_0 = 1.9030e-04
Loss = 3.9213e-01, PNorm = 62.7293, GNorm = 1.5599, lr_0 = 1.9017e-04
Loss = 3.3050e-01, PNorm = 62.7325, GNorm = 1.6637, lr_0 = 1.9004e-04
Loss = 3.0972e-01, PNorm = 62.7333, GNorm = 1.5438, lr_0 = 1.8991e-04
Loss = 3.1890e-01, PNorm = 62.7348, GNorm = 1.1670, lr_0 = 1.8978e-04
Loss = 3.8065e-01, PNorm = 62.7366, GNorm = 1.2727, lr_0 = 1.8965e-04
Loss = 4.0991e-01, PNorm = 62.7396, GNorm = 1.3897, lr_0 = 1.8952e-04
Loss = 3.3159e-01, PNorm = 62.7425, GNorm = 1.4742, lr_0 = 1.8939e-04
Loss = 3.5753e-01, PNorm = 62.7441, GNorm = 0.9451, lr_0 = 1.8926e-04
Loss = 3.1715e-01, PNorm = 62.7452, GNorm = 1.3976, lr_0 = 1.8913e-04
Loss = 3.2292e-01, PNorm = 62.7473, GNorm = 1.3270, lr_0 = 1.8900e-04
Loss = 3.1498e-01, PNorm = 62.7472, GNorm = 1.6099, lr_0 = 1.8887e-04
Loss = 3.2814e-01, PNorm = 62.7453, GNorm = 1.3958, lr_0 = 1.8874e-04
Loss = 3.5686e-01, PNorm = 62.7462, GNorm = 1.2449, lr_0 = 1.8861e-04
Loss = 3.6749e-01, PNorm = 62.7490, GNorm = 2.1590, lr_0 = 1.8848e-04
Loss = 3.2050e-01, PNorm = 62.7499, GNorm = 1.3920, lr_0 = 1.8835e-04
Loss = 3.8810e-01, PNorm = 62.7496, GNorm = 1.9646, lr_0 = 1.8822e-04
Loss = 3.7362e-01, PNorm = 62.7535, GNorm = 1.4191, lr_0 = 1.8809e-04
Loss = 4.0953e-01, PNorm = 62.7559, GNorm = 1.7487, lr_0 = 1.8797e-04
Loss = 3.5806e-01, PNorm = 62.7576, GNorm = 1.5089, lr_0 = 1.8784e-04
Loss = 3.6964e-01, PNorm = 62.7614, GNorm = 1.4586, lr_0 = 1.8771e-04
Loss = 3.7282e-01, PNorm = 62.7644, GNorm = 1.7558, lr_0 = 1.8758e-04
Loss = 3.4458e-01, PNorm = 62.7639, GNorm = 1.1278, lr_0 = 1.8745e-04
Loss = 3.4920e-01, PNorm = 62.7650, GNorm = 1.3786, lr_0 = 1.8732e-04
Loss = 3.7980e-01, PNorm = 62.7675, GNorm = 2.3976, lr_0 = 1.8719e-04
Loss = 3.7119e-01, PNorm = 62.7692, GNorm = 1.3486, lr_0 = 1.8707e-04
Loss = 3.6397e-01, PNorm = 62.7720, GNorm = 1.1107, lr_0 = 1.8694e-04
Loss = 3.8701e-01, PNorm = 62.7705, GNorm = 1.1375, lr_0 = 1.8681e-04
Loss = 3.4437e-01, PNorm = 62.7729, GNorm = 1.3430, lr_0 = 1.8668e-04
Loss = 3.7398e-01, PNorm = 62.7754, GNorm = 1.0163, lr_0 = 1.8655e-04
Loss = 3.1962e-01, PNorm = 62.7756, GNorm = 1.1657, lr_0 = 1.8643e-04
Loss = 3.4891e-01, PNorm = 62.7783, GNorm = 1.4533, lr_0 = 1.8630e-04
Loss = 3.4532e-01, PNorm = 62.7828, GNorm = 1.7421, lr_0 = 1.8617e-04
Loss = 3.3776e-01, PNorm = 62.7840, GNorm = 1.1842, lr_0 = 1.8604e-04
Loss = 3.8588e-01, PNorm = 62.7853, GNorm = 1.4641, lr_0 = 1.8592e-04
Loss = 3.8239e-01, PNorm = 62.7911, GNorm = 1.2954, lr_0 = 1.8579e-04
Loss = 4.0739e-01, PNorm = 62.7925, GNorm = 1.6828, lr_0 = 1.8566e-04
Loss = 3.1752e-01, PNorm = 62.7983, GNorm = 1.6336, lr_0 = 1.8553e-04
Loss = 3.6938e-01, PNorm = 62.8016, GNorm = 1.4429, lr_0 = 1.8541e-04
Loss = 4.0611e-01, PNorm = 62.8025, GNorm = 1.6423, lr_0 = 1.8528e-04
Loss = 3.2854e-01, PNorm = 62.8063, GNorm = 1.1074, lr_0 = 1.8515e-04
Loss = 3.3968e-01, PNorm = 62.8101, GNorm = 1.0018, lr_0 = 1.8503e-04
Loss = 3.5107e-01, PNorm = 62.8140, GNorm = 1.5311, lr_0 = 1.8490e-04
Loss = 2.9157e-01, PNorm = 62.8148, GNorm = 1.3569, lr_0 = 1.8477e-04
Loss = 3.3790e-01, PNorm = 62.8154, GNorm = 1.3449, lr_0 = 1.8465e-04
Loss = 3.6502e-01, PNorm = 62.8178, GNorm = 1.3478, lr_0 = 1.8452e-04
Loss = 3.5567e-01, PNorm = 62.8176, GNorm = 1.2648, lr_0 = 1.8439e-04
Loss = 3.4191e-01, PNorm = 62.8188, GNorm = 1.8333, lr_0 = 1.8427e-04
Loss = 3.3018e-01, PNorm = 62.8227, GNorm = 1.4964, lr_0 = 1.8414e-04
Loss = 3.4166e-01, PNorm = 62.8226, GNorm = 1.2263, lr_0 = 1.8401e-04
Loss = 3.2485e-01, PNorm = 62.8243, GNorm = 1.9973, lr_0 = 1.8389e-04
Loss = 4.1161e-01, PNorm = 62.8286, GNorm = 1.4908, lr_0 = 1.8376e-04
Loss = 3.4948e-01, PNorm = 62.8307, GNorm = 2.1218, lr_0 = 1.8364e-04
Loss = 3.8476e-01, PNorm = 62.8298, GNorm = 1.6183, lr_0 = 1.8351e-04
Loss = 4.0961e-01, PNorm = 62.8291, GNorm = 1.6607, lr_0 = 1.8338e-04
Loss = 3.1636e-01, PNorm = 62.8301, GNorm = 0.9414, lr_0 = 1.8326e-04
Loss = 3.6345e-01, PNorm = 62.8324, GNorm = 1.4639, lr_0 = 1.8313e-04
Loss = 4.4171e-01, PNorm = 62.8328, GNorm = 1.5302, lr_0 = 1.8301e-04
Loss = 3.7497e-01, PNorm = 62.8337, GNorm = 1.5782, lr_0 = 1.8288e-04
Loss = 3.3097e-01, PNorm = 62.8363, GNorm = 1.3895, lr_0 = 1.8276e-04
Loss = 3.5802e-01, PNorm = 62.8398, GNorm = 1.3917, lr_0 = 1.8263e-04
Loss = 3.6409e-01, PNorm = 62.8420, GNorm = 1.4961, lr_0 = 1.8251e-04
Loss = 3.3926e-01, PNorm = 62.8437, GNorm = 1.8561, lr_0 = 1.8238e-04
Loss = 3.6768e-01, PNorm = 62.8443, GNorm = 1.5262, lr_0 = 1.8226e-04
Loss = 3.4997e-01, PNorm = 62.8474, GNorm = 1.4402, lr_0 = 1.8213e-04
Loss = 3.8650e-01, PNorm = 62.8503, GNorm = 1.5258, lr_0 = 1.8201e-04
Loss = 3.4412e-01, PNorm = 62.8515, GNorm = 1.3467, lr_0 = 1.8188e-04
Loss = 3.5659e-01, PNorm = 62.8531, GNorm = 1.4183, lr_0 = 1.8176e-04
Loss = 3.5410e-01, PNorm = 62.8557, GNorm = 1.3411, lr_0 = 1.8163e-04
Loss = 3.0018e-01, PNorm = 62.8551, GNorm = 2.0597, lr_0 = 1.8151e-04
Loss = 3.4254e-01, PNorm = 62.8569, GNorm = 1.2893, lr_0 = 1.8138e-04
Loss = 3.4937e-01, PNorm = 62.8601, GNorm = 1.6937, lr_0 = 1.8126e-04
Loss = 3.3612e-01, PNorm = 62.8621, GNorm = 1.0743, lr_0 = 1.8114e-04
Loss = 3.4093e-01, PNorm = 62.8642, GNorm = 1.2126, lr_0 = 1.8101e-04
Loss = 3.2337e-01, PNorm = 62.8649, GNorm = 1.0957, lr_0 = 1.8089e-04
Loss = 3.8685e-01, PNorm = 62.8679, GNorm = 1.6991, lr_0 = 1.8076e-04
Loss = 3.6683e-01, PNorm = 62.8700, GNorm = 2.0956, lr_0 = 1.8064e-04
Loss = 3.7338e-01, PNorm = 62.8700, GNorm = 1.5831, lr_0 = 1.8052e-04
Loss = 2.6917e-01, PNorm = 62.8708, GNorm = 1.1604, lr_0 = 1.8039e-04
Loss = 3.4958e-01, PNorm = 62.8728, GNorm = 2.0215, lr_0 = 1.8027e-04
Loss = 3.9319e-01, PNorm = 62.8728, GNorm = 1.0858, lr_0 = 1.8015e-04
Loss = 3.6859e-01, PNorm = 62.8737, GNorm = 1.3242, lr_0 = 1.8002e-04
Loss = 3.2680e-01, PNorm = 62.8737, GNorm = 1.2723, lr_0 = 1.7990e-04
Loss = 3.2176e-01, PNorm = 62.8743, GNorm = 1.2388, lr_0 = 1.7978e-04
Loss = 3.5629e-01, PNorm = 62.8783, GNorm = 1.2425, lr_0 = 1.7965e-04
Loss = 3.0521e-01, PNorm = 62.8813, GNorm = 1.1831, lr_0 = 1.7953e-04
Loss = 3.4668e-01, PNorm = 62.8789, GNorm = 1.1536, lr_0 = 1.7941e-04
Loss = 3.7811e-01, PNorm = 62.8799, GNorm = 1.1962, lr_0 = 1.7928e-04
Loss = 3.9654e-01, PNorm = 62.8806, GNorm = 1.6280, lr_0 = 1.7916e-04
Loss = 3.5642e-01, PNorm = 62.8828, GNorm = 1.7645, lr_0 = 1.7904e-04
Loss = 3.5630e-01, PNorm = 62.8876, GNorm = 1.3809, lr_0 = 1.7892e-04
Loss = 3.6833e-01, PNorm = 62.8910, GNorm = 1.2562, lr_0 = 1.7879e-04
Loss = 3.3242e-01, PNorm = 62.8924, GNorm = 1.3696, lr_0 = 1.7867e-04
Loss = 3.5834e-01, PNorm = 62.8915, GNorm = 1.6255, lr_0 = 1.7855e-04
Loss = 3.9104e-01, PNorm = 62.8932, GNorm = 2.0880, lr_0 = 1.7843e-04
Loss = 3.9345e-01, PNorm = 62.8931, GNorm = 1.4515, lr_0 = 1.7830e-04
Loss = 3.8509e-01, PNorm = 62.8963, GNorm = 1.4817, lr_0 = 1.7818e-04
Loss = 3.4363e-01, PNorm = 62.8979, GNorm = 1.2924, lr_0 = 1.7806e-04
Loss = 3.4987e-01, PNorm = 62.8997, GNorm = 1.5339, lr_0 = 1.7794e-04
Loss = 4.0551e-01, PNorm = 62.9014, GNorm = 1.7546, lr_0 = 1.7782e-04
Validation mae = 0.112792
Epoch 23
Loss = 3.9288e-01, PNorm = 62.9058, GNorm = 1.1641, lr_0 = 1.7769e-04
Loss = 3.1963e-01, PNorm = 62.9092, GNorm = 1.2408, lr_0 = 1.7757e-04
Loss = 3.1341e-01, PNorm = 62.9107, GNorm = 1.2206, lr_0 = 1.7745e-04
Loss = 4.2902e-01, PNorm = 62.9133, GNorm = 1.3731, lr_0 = 1.7733e-04
Loss = 3.5435e-01, PNorm = 62.9164, GNorm = 2.2215, lr_0 = 1.7721e-04
Loss = 3.4474e-01, PNorm = 62.9178, GNorm = 1.4799, lr_0 = 1.7709e-04
Loss = 3.9839e-01, PNorm = 62.9205, GNorm = 1.9869, lr_0 = 1.7696e-04
Loss = 3.5510e-01, PNorm = 62.9249, GNorm = 1.1123, lr_0 = 1.7684e-04
Loss = 3.9288e-01, PNorm = 62.9258, GNorm = 2.1299, lr_0 = 1.7672e-04
Loss = 3.9094e-01, PNorm = 62.9279, GNorm = 1.7276, lr_0 = 1.7660e-04
Loss = 3.5184e-01, PNorm = 62.9299, GNorm = 1.8702, lr_0 = 1.7648e-04
Loss = 3.4567e-01, PNorm = 62.9317, GNorm = 1.0982, lr_0 = 1.7636e-04
Loss = 3.5656e-01, PNorm = 62.9332, GNorm = 1.0391, lr_0 = 1.7624e-04
Loss = 3.4202e-01, PNorm = 62.9351, GNorm = 1.0503, lr_0 = 1.7612e-04
Loss = 3.5407e-01, PNorm = 62.9362, GNorm = 1.2721, lr_0 = 1.7600e-04
Loss = 3.3102e-01, PNorm = 62.9386, GNorm = 1.8261, lr_0 = 1.7588e-04
Loss = 3.5994e-01, PNorm = 62.9405, GNorm = 1.3585, lr_0 = 1.7576e-04
Loss = 3.2567e-01, PNorm = 62.9421, GNorm = 1.3719, lr_0 = 1.7564e-04
Loss = 3.9944e-01, PNorm = 62.9452, GNorm = 1.3654, lr_0 = 1.7552e-04
Loss = 3.4886e-01, PNorm = 62.9480, GNorm = 1.3877, lr_0 = 1.7540e-04
Loss = 3.6649e-01, PNorm = 62.9474, GNorm = 1.6321, lr_0 = 1.7528e-04
Loss = 3.5921e-01, PNorm = 62.9490, GNorm = 1.4434, lr_0 = 1.7516e-04
Loss = 3.3724e-01, PNorm = 62.9493, GNorm = 1.5375, lr_0 = 1.7504e-04
Loss = 3.3962e-01, PNorm = 62.9473, GNorm = 1.6439, lr_0 = 1.7492e-04
Loss = 3.8503e-01, PNorm = 62.9492, GNorm = 1.2853, lr_0 = 1.7480e-04
Loss = 3.6248e-01, PNorm = 62.9518, GNorm = 1.7342, lr_0 = 1.7468e-04
Loss = 3.4503e-01, PNorm = 62.9546, GNorm = 1.1349, lr_0 = 1.7456e-04
Loss = 3.5097e-01, PNorm = 62.9560, GNorm = 1.1993, lr_0 = 1.7444e-04
Loss = 3.3019e-01, PNorm = 62.9577, GNorm = 1.3776, lr_0 = 1.7432e-04
Loss = 3.2008e-01, PNorm = 62.9569, GNorm = 1.4159, lr_0 = 1.7420e-04
Loss = 3.8421e-01, PNorm = 62.9580, GNorm = 1.2027, lr_0 = 1.7408e-04
Loss = 3.2193e-01, PNorm = 62.9601, GNorm = 1.5880, lr_0 = 1.7396e-04
Loss = 3.5535e-01, PNorm = 62.9619, GNorm = 2.1353, lr_0 = 1.7384e-04
Loss = 3.7823e-01, PNorm = 62.9639, GNorm = 1.2973, lr_0 = 1.7372e-04
Loss = 3.3178e-01, PNorm = 62.9655, GNorm = 1.1885, lr_0 = 1.7360e-04
Loss = 3.4298e-01, PNorm = 62.9675, GNorm = 0.9954, lr_0 = 1.7348e-04
Loss = 3.7252e-01, PNorm = 62.9679, GNorm = 1.5880, lr_0 = 1.7336e-04
Loss = 2.8901e-01, PNorm = 62.9671, GNorm = 1.3357, lr_0 = 1.7325e-04
Loss = 4.3890e-01, PNorm = 62.9668, GNorm = 1.3140, lr_0 = 1.7313e-04
Loss = 3.3265e-01, PNorm = 62.9698, GNorm = 1.0160, lr_0 = 1.7301e-04
Loss = 3.3326e-01, PNorm = 62.9732, GNorm = 1.7473, lr_0 = 1.7289e-04
Loss = 3.4936e-01, PNorm = 62.9725, GNorm = 1.3457, lr_0 = 1.7277e-04
Loss = 3.5660e-01, PNorm = 62.9736, GNorm = 1.8219, lr_0 = 1.7265e-04
Loss = 3.4988e-01, PNorm = 62.9759, GNorm = 1.5714, lr_0 = 1.7253e-04
Loss = 3.7362e-01, PNorm = 62.9757, GNorm = 1.3298, lr_0 = 1.7242e-04
Loss = 3.5118e-01, PNorm = 62.9748, GNorm = 1.5880, lr_0 = 1.7230e-04
Loss = 4.1580e-01, PNorm = 62.9779, GNorm = 1.8170, lr_0 = 1.7218e-04
Loss = 3.4242e-01, PNorm = 62.9790, GNorm = 1.1580, lr_0 = 1.7206e-04
Loss = 3.3145e-01, PNorm = 62.9820, GNorm = 1.8446, lr_0 = 1.7194e-04
Loss = 3.7718e-01, PNorm = 62.9856, GNorm = 1.3946, lr_0 = 1.7183e-04
Loss = 4.3132e-01, PNorm = 62.9879, GNorm = 1.4503, lr_0 = 1.7171e-04
Loss = 3.5008e-01, PNorm = 62.9900, GNorm = 1.5825, lr_0 = 1.7159e-04
Loss = 3.8107e-01, PNorm = 62.9897, GNorm = 2.6117, lr_0 = 1.7147e-04
Loss = 3.2605e-01, PNorm = 62.9922, GNorm = 1.3045, lr_0 = 1.7136e-04
Loss = 4.1172e-01, PNorm = 62.9961, GNorm = 1.2284, lr_0 = 1.7124e-04
Loss = 3.4919e-01, PNorm = 62.9999, GNorm = 0.9591, lr_0 = 1.7112e-04
Loss = 3.4117e-01, PNorm = 63.0013, GNorm = 1.3076, lr_0 = 1.7100e-04
Loss = 3.5582e-01, PNorm = 63.0032, GNorm = 1.5114, lr_0 = 1.7089e-04
Loss = 3.3297e-01, PNorm = 63.0065, GNorm = 1.3502, lr_0 = 1.7077e-04
Loss = 3.9201e-01, PNorm = 63.0093, GNorm = 1.3773, lr_0 = 1.7065e-04
Loss = 4.0538e-01, PNorm = 63.0113, GNorm = 1.3551, lr_0 = 1.7054e-04
Loss = 3.9648e-01, PNorm = 63.0123, GNorm = 1.3492, lr_0 = 1.7042e-04
Loss = 3.8383e-01, PNorm = 63.0139, GNorm = 1.4033, lr_0 = 1.7030e-04
Loss = 3.9122e-01, PNorm = 63.0173, GNorm = 1.2794, lr_0 = 1.7019e-04
Loss = 3.1683e-01, PNorm = 63.0199, GNorm = 1.3637, lr_0 = 1.7007e-04
Loss = 3.2527e-01, PNorm = 63.0225, GNorm = 1.1986, lr_0 = 1.6995e-04
Loss = 3.4098e-01, PNorm = 63.0243, GNorm = 1.7376, lr_0 = 1.6984e-04
Loss = 3.8126e-01, PNorm = 63.0263, GNorm = 1.1268, lr_0 = 1.6972e-04
Loss = 3.2949e-01, PNorm = 63.0256, GNorm = 1.0940, lr_0 = 1.6960e-04
Loss = 3.1258e-01, PNorm = 63.0276, GNorm = 1.6148, lr_0 = 1.6949e-04
Loss = 3.0531e-01, PNorm = 63.0293, GNorm = 1.7177, lr_0 = 1.6937e-04
Loss = 3.7331e-01, PNorm = 63.0302, GNorm = 1.3349, lr_0 = 1.6926e-04
Loss = 3.3115e-01, PNorm = 63.0297, GNorm = 1.6006, lr_0 = 1.6914e-04
Loss = 3.4034e-01, PNorm = 63.0322, GNorm = 1.3687, lr_0 = 1.6902e-04
Loss = 3.4149e-01, PNorm = 63.0349, GNorm = 1.5610, lr_0 = 1.6891e-04
Loss = 3.6092e-01, PNorm = 63.0353, GNorm = 1.5446, lr_0 = 1.6879e-04
Loss = 3.4452e-01, PNorm = 63.0376, GNorm = 1.9806, lr_0 = 1.6868e-04
Loss = 3.4090e-01, PNorm = 63.0383, GNorm = 1.3511, lr_0 = 1.6856e-04
Loss = 3.3743e-01, PNorm = 63.0393, GNorm = 2.0655, lr_0 = 1.6845e-04
Loss = 3.1408e-01, PNorm = 63.0387, GNorm = 1.3855, lr_0 = 1.6833e-04
Loss = 3.2726e-01, PNorm = 63.0404, GNorm = 1.2207, lr_0 = 1.6821e-04
Loss = 3.4669e-01, PNorm = 63.0440, GNorm = 1.3758, lr_0 = 1.6810e-04
Loss = 3.7455e-01, PNorm = 63.0440, GNorm = 1.4950, lr_0 = 1.6798e-04
Loss = 3.5548e-01, PNorm = 63.0444, GNorm = 2.1664, lr_0 = 1.6787e-04
Loss = 3.8986e-01, PNorm = 63.0454, GNorm = 1.4445, lr_0 = 1.6775e-04
Loss = 3.9608e-01, PNorm = 63.0461, GNorm = 1.1483, lr_0 = 1.6764e-04
Loss = 3.5566e-01, PNorm = 63.0474, GNorm = 1.4420, lr_0 = 1.6752e-04
Loss = 3.4924e-01, PNorm = 63.0491, GNorm = 1.1551, lr_0 = 1.6741e-04
Loss = 3.7785e-01, PNorm = 63.0495, GNorm = 1.5698, lr_0 = 1.6729e-04
Loss = 3.4566e-01, PNorm = 63.0520, GNorm = 1.5280, lr_0 = 1.6718e-04
Loss = 3.4050e-01, PNorm = 63.0567, GNorm = 1.9258, lr_0 = 1.6707e-04
Loss = 3.6005e-01, PNorm = 63.0595, GNorm = 1.6938, lr_0 = 1.6695e-04
Loss = 3.5258e-01, PNorm = 63.0608, GNorm = 1.5945, lr_0 = 1.6684e-04
Loss = 3.7850e-01, PNorm = 63.0628, GNorm = 1.3016, lr_0 = 1.6672e-04
Loss = 3.7045e-01, PNorm = 63.0624, GNorm = 1.6163, lr_0 = 1.6661e-04
Loss = 3.5071e-01, PNorm = 63.0631, GNorm = 1.3079, lr_0 = 1.6649e-04
Loss = 3.5471e-01, PNorm = 63.0624, GNorm = 1.5366, lr_0 = 1.6638e-04
Loss = 3.5108e-01, PNorm = 63.0638, GNorm = 2.0692, lr_0 = 1.6627e-04
Loss = 3.5924e-01, PNorm = 63.0651, GNorm = 2.6567, lr_0 = 1.6615e-04
Loss = 3.6377e-01, PNorm = 63.0653, GNorm = 1.3184, lr_0 = 1.6604e-04
Loss = 3.6742e-01, PNorm = 63.0672, GNorm = 1.6836, lr_0 = 1.6592e-04
Loss = 3.2718e-01, PNorm = 63.0686, GNorm = 1.1643, lr_0 = 1.6581e-04
Loss = 3.1564e-01, PNorm = 63.0709, GNorm = 1.0398, lr_0 = 1.6570e-04
Loss = 3.0663e-01, PNorm = 63.0725, GNorm = 1.5052, lr_0 = 1.6558e-04
Loss = 3.8659e-01, PNorm = 63.0746, GNorm = 2.0224, lr_0 = 1.6547e-04
Loss = 3.3299e-01, PNorm = 63.0764, GNorm = 1.4930, lr_0 = 1.6536e-04
Loss = 3.3088e-01, PNorm = 63.0795, GNorm = 1.2232, lr_0 = 1.6524e-04
Loss = 3.5029e-01, PNorm = 63.0792, GNorm = 1.4567, lr_0 = 1.6513e-04
Loss = 3.2167e-01, PNorm = 63.0792, GNorm = 1.0612, lr_0 = 1.6502e-04
Loss = 3.2762e-01, PNorm = 63.0816, GNorm = 1.2469, lr_0 = 1.6490e-04
Loss = 3.6065e-01, PNorm = 63.0831, GNorm = 1.1292, lr_0 = 1.6479e-04
Loss = 3.5579e-01, PNorm = 63.0833, GNorm = 1.4198, lr_0 = 1.6468e-04
Loss = 3.7734e-01, PNorm = 63.0851, GNorm = 2.3025, lr_0 = 1.6457e-04
Loss = 3.4732e-01, PNorm = 63.0858, GNorm = 1.2728, lr_0 = 1.6445e-04
Loss = 3.5364e-01, PNorm = 63.0875, GNorm = 2.1689, lr_0 = 1.6434e-04
Loss = 3.6409e-01, PNorm = 63.0894, GNorm = 1.1839, lr_0 = 1.6423e-04
Loss = 3.5992e-01, PNorm = 63.0919, GNorm = 1.1116, lr_0 = 1.6412e-04
Loss = 3.5612e-01, PNorm = 63.0939, GNorm = 1.1252, lr_0 = 1.6400e-04
Loss = 3.2674e-01, PNorm = 63.0977, GNorm = 1.3574, lr_0 = 1.6389e-04
Loss = 3.7651e-01, PNorm = 63.1001, GNorm = 1.4564, lr_0 = 1.6378e-04
Validation mae = 0.111548
Epoch 24
Loss = 3.2633e-01, PNorm = 63.1028, GNorm = 1.9250, lr_0 = 1.6367e-04
Loss = 3.4486e-01, PNorm = 63.1066, GNorm = 1.0762, lr_0 = 1.6355e-04
Loss = 3.7245e-01, PNorm = 63.1093, GNorm = 1.3859, lr_0 = 1.6344e-04
Loss = 3.8222e-01, PNorm = 63.1125, GNorm = 1.9260, lr_0 = 1.6333e-04
Loss = 3.2782e-01, PNorm = 63.1133, GNorm = 1.4350, lr_0 = 1.6322e-04
Loss = 3.2417e-01, PNorm = 63.1142, GNorm = 0.9755, lr_0 = 1.6311e-04
Loss = 3.6775e-01, PNorm = 63.1168, GNorm = 1.4073, lr_0 = 1.6299e-04
Loss = 3.6366e-01, PNorm = 63.1175, GNorm = 1.1654, lr_0 = 1.6288e-04
Loss = 3.4380e-01, PNorm = 63.1196, GNorm = 1.1340, lr_0 = 1.6277e-04
Loss = 3.4383e-01, PNorm = 63.1218, GNorm = 1.1666, lr_0 = 1.6266e-04
Loss = 3.9276e-01, PNorm = 63.1223, GNorm = 1.3702, lr_0 = 1.6255e-04
Loss = 3.1056e-01, PNorm = 63.1236, GNorm = 1.3885, lr_0 = 1.6244e-04
Loss = 3.5307e-01, PNorm = 63.1245, GNorm = 1.5312, lr_0 = 1.6233e-04
Loss = 3.5656e-01, PNorm = 63.1273, GNorm = 2.0169, lr_0 = 1.6221e-04
Loss = 3.4441e-01, PNorm = 63.1321, GNorm = 1.2244, lr_0 = 1.6210e-04
Loss = 3.3861e-01, PNorm = 63.1348, GNorm = 1.1102, lr_0 = 1.6199e-04
Loss = 3.7563e-01, PNorm = 63.1370, GNorm = 1.2859, lr_0 = 1.6188e-04
Loss = 4.1210e-01, PNorm = 63.1390, GNorm = 2.5499, lr_0 = 1.6177e-04
Loss = 3.6626e-01, PNorm = 63.1427, GNorm = 1.3571, lr_0 = 1.6166e-04
Loss = 4.0896e-01, PNorm = 63.1432, GNorm = 1.7254, lr_0 = 1.6155e-04
Loss = 3.1196e-01, PNorm = 63.1433, GNorm = 1.6404, lr_0 = 1.6144e-04
Loss = 3.4741e-01, PNorm = 63.1472, GNorm = 1.4101, lr_0 = 1.6133e-04
Loss = 3.3068e-01, PNorm = 63.1498, GNorm = 1.1508, lr_0 = 1.6122e-04
Loss = 4.2070e-01, PNorm = 63.1495, GNorm = 2.7600, lr_0 = 1.6111e-04
Loss = 3.5164e-01, PNorm = 63.1522, GNorm = 1.6984, lr_0 = 1.6100e-04
Loss = 3.4984e-01, PNorm = 63.1534, GNorm = 1.2489, lr_0 = 1.6089e-04
Loss = 3.4499e-01, PNorm = 63.1546, GNorm = 1.2131, lr_0 = 1.6078e-04
Loss = 3.3249e-01, PNorm = 63.1529, GNorm = 1.4894, lr_0 = 1.6067e-04
Loss = 3.1967e-01, PNorm = 63.1537, GNorm = 1.1902, lr_0 = 1.6056e-04
Loss = 3.4836e-01, PNorm = 63.1572, GNorm = 1.5703, lr_0 = 1.6045e-04
Loss = 3.1268e-01, PNorm = 63.1600, GNorm = 1.5373, lr_0 = 1.6034e-04
Loss = 3.3093e-01, PNorm = 63.1615, GNorm = 1.4225, lr_0 = 1.6023e-04
Loss = 3.2932e-01, PNorm = 63.1614, GNorm = 1.4897, lr_0 = 1.6012e-04
Loss = 3.1718e-01, PNorm = 63.1632, GNorm = 1.2345, lr_0 = 1.6001e-04
Loss = 4.3722e-01, PNorm = 63.1651, GNorm = 1.5879, lr_0 = 1.5990e-04
Loss = 3.6020e-01, PNorm = 63.1658, GNorm = 1.2375, lr_0 = 1.5979e-04
Loss = 3.0253e-01, PNorm = 63.1679, GNorm = 1.3571, lr_0 = 1.5968e-04
Loss = 4.2099e-01, PNorm = 63.1675, GNorm = 1.5056, lr_0 = 1.5957e-04
Loss = 3.2702e-01, PNorm = 63.1682, GNorm = 1.9250, lr_0 = 1.5946e-04
Loss = 2.8757e-01, PNorm = 63.1711, GNorm = 1.9233, lr_0 = 1.5935e-04
Loss = 3.4930e-01, PNorm = 63.1711, GNorm = 1.2844, lr_0 = 1.5924e-04
Loss = 3.5236e-01, PNorm = 63.1707, GNorm = 1.3247, lr_0 = 1.5913e-04
Loss = 3.5155e-01, PNorm = 63.1728, GNorm = 1.2443, lr_0 = 1.5902e-04
Loss = 3.3244e-01, PNorm = 63.1747, GNorm = 1.3873, lr_0 = 1.5891e-04
Loss = 3.7217e-01, PNorm = 63.1760, GNorm = 1.4642, lr_0 = 1.5880e-04
Loss = 3.2159e-01, PNorm = 63.1790, GNorm = 1.2478, lr_0 = 1.5870e-04
Loss = 3.3422e-01, PNorm = 63.1802, GNorm = 1.4463, lr_0 = 1.5859e-04
Loss = 3.3647e-01, PNorm = 63.1794, GNorm = 1.6816, lr_0 = 1.5848e-04
Loss = 3.6854e-01, PNorm = 63.1816, GNorm = 1.5419, lr_0 = 1.5837e-04
Loss = 3.7380e-01, PNorm = 63.1863, GNorm = 1.6316, lr_0 = 1.5826e-04
Loss = 4.0952e-01, PNorm = 63.1863, GNorm = 1.4288, lr_0 = 1.5815e-04
Loss = 3.3551e-01, PNorm = 63.1876, GNorm = 1.1579, lr_0 = 1.5804e-04
Loss = 3.5842e-01, PNorm = 63.1895, GNorm = 1.4815, lr_0 = 1.5794e-04
Loss = 4.3926e-01, PNorm = 63.1910, GNorm = 1.7549, lr_0 = 1.5783e-04
Loss = 3.7204e-01, PNorm = 63.1948, GNorm = 1.5918, lr_0 = 1.5772e-04
Loss = 3.9694e-01, PNorm = 63.1964, GNorm = 1.7858, lr_0 = 1.5761e-04
Loss = 3.7449e-01, PNorm = 63.1973, GNorm = 1.3045, lr_0 = 1.5750e-04
Loss = 3.8183e-01, PNorm = 63.1972, GNorm = 1.7117, lr_0 = 1.5740e-04
Loss = 3.5772e-01, PNorm = 63.2001, GNorm = 1.9198, lr_0 = 1.5729e-04
Loss = 3.1371e-01, PNorm = 63.2034, GNorm = 1.6616, lr_0 = 1.5718e-04
Loss = 3.2286e-01, PNorm = 63.2031, GNorm = 1.4032, lr_0 = 1.5707e-04
Loss = 3.4928e-01, PNorm = 63.2015, GNorm = 1.2902, lr_0 = 1.5697e-04
Loss = 3.1621e-01, PNorm = 63.2029, GNorm = 1.2687, lr_0 = 1.5686e-04
Loss = 3.6792e-01, PNorm = 63.2043, GNorm = 1.7107, lr_0 = 1.5675e-04
Loss = 3.4489e-01, PNorm = 63.2058, GNorm = 1.6561, lr_0 = 1.5664e-04
Loss = 2.9812e-01, PNorm = 63.2081, GNorm = 1.3322, lr_0 = 1.5654e-04
Loss = 3.8157e-01, PNorm = 63.2125, GNorm = 1.9830, lr_0 = 1.5643e-04
Loss = 4.0036e-01, PNorm = 63.2145, GNorm = 2.3228, lr_0 = 1.5632e-04
Loss = 3.2672e-01, PNorm = 63.2135, GNorm = 1.8027, lr_0 = 1.5621e-04
Loss = 3.6738e-01, PNorm = 63.2149, GNorm = 1.8094, lr_0 = 1.5611e-04
Loss = 3.4011e-01, PNorm = 63.2162, GNorm = 1.3876, lr_0 = 1.5600e-04
Loss = 3.2338e-01, PNorm = 63.2197, GNorm = 1.6011, lr_0 = 1.5589e-04
Loss = 3.0779e-01, PNorm = 63.2198, GNorm = 1.1388, lr_0 = 1.5579e-04
Loss = 3.2592e-01, PNorm = 63.2228, GNorm = 1.4673, lr_0 = 1.5568e-04
Loss = 2.7915e-01, PNorm = 63.2263, GNorm = 1.5472, lr_0 = 1.5557e-04
Loss = 3.8178e-01, PNorm = 63.2284, GNorm = 2.0493, lr_0 = 1.5547e-04
Loss = 3.1171e-01, PNorm = 63.2291, GNorm = 1.1736, lr_0 = 1.5536e-04
Loss = 3.5140e-01, PNorm = 63.2317, GNorm = 1.2971, lr_0 = 1.5525e-04
Loss = 3.7641e-01, PNorm = 63.2303, GNorm = 1.3389, lr_0 = 1.5515e-04
Loss = 3.7150e-01, PNorm = 63.2288, GNorm = 1.6511, lr_0 = 1.5504e-04
Loss = 3.7281e-01, PNorm = 63.2300, GNorm = 1.8451, lr_0 = 1.5493e-04
Loss = 3.8400e-01, PNorm = 63.2324, GNorm = 1.7795, lr_0 = 1.5483e-04
Loss = 3.5189e-01, PNorm = 63.2322, GNorm = 1.7492, lr_0 = 1.5472e-04
Loss = 3.3251e-01, PNorm = 63.2346, GNorm = 1.1790, lr_0 = 1.5462e-04
Loss = 3.3872e-01, PNorm = 63.2382, GNorm = 1.2790, lr_0 = 1.5451e-04
Loss = 3.6326e-01, PNorm = 63.2398, GNorm = 1.5503, lr_0 = 1.5440e-04
Loss = 3.6627e-01, PNorm = 63.2401, GNorm = 1.6817, lr_0 = 1.5430e-04
Loss = 3.3781e-01, PNorm = 63.2398, GNorm = 1.4629, lr_0 = 1.5419e-04
Loss = 3.4415e-01, PNorm = 63.2427, GNorm = 1.2907, lr_0 = 1.5409e-04
Loss = 3.8206e-01, PNorm = 63.2449, GNorm = 1.2845, lr_0 = 1.5398e-04
Loss = 3.4752e-01, PNorm = 63.2471, GNorm = 1.5100, lr_0 = 1.5388e-04
Loss = 3.4504e-01, PNorm = 63.2496, GNorm = 2.0846, lr_0 = 1.5377e-04
Loss = 3.7003e-01, PNorm = 63.2512, GNorm = 1.1761, lr_0 = 1.5367e-04
Loss = 3.9079e-01, PNorm = 63.2516, GNorm = 1.7132, lr_0 = 1.5356e-04
Loss = 3.8765e-01, PNorm = 63.2517, GNorm = 1.6904, lr_0 = 1.5346e-04
Loss = 3.6864e-01, PNorm = 63.2533, GNorm = 1.0793, lr_0 = 1.5335e-04
Loss = 3.5091e-01, PNorm = 63.2552, GNorm = 1.7468, lr_0 = 1.5325e-04
Loss = 3.2320e-01, PNorm = 63.2559, GNorm = 1.5650, lr_0 = 1.5314e-04
Loss = 3.8680e-01, PNorm = 63.2551, GNorm = 1.7801, lr_0 = 1.5304e-04
Loss = 3.6336e-01, PNorm = 63.2564, GNorm = 1.2802, lr_0 = 1.5293e-04
Loss = 3.6884e-01, PNorm = 63.2584, GNorm = 2.3190, lr_0 = 1.5283e-04
Loss = 3.5727e-01, PNorm = 63.2579, GNorm = 1.4041, lr_0 = 1.5272e-04
Loss = 3.3812e-01, PNorm = 63.2604, GNorm = 1.2598, lr_0 = 1.5262e-04
Loss = 3.1888e-01, PNorm = 63.2630, GNorm = 1.9295, lr_0 = 1.5251e-04
Loss = 3.7728e-01, PNorm = 63.2611, GNorm = 1.3074, lr_0 = 1.5241e-04
Loss = 3.6386e-01, PNorm = 63.2632, GNorm = 1.1930, lr_0 = 1.5230e-04
Loss = 3.1712e-01, PNorm = 63.2666, GNorm = 1.6701, lr_0 = 1.5220e-04
Loss = 4.0216e-01, PNorm = 63.2655, GNorm = 1.5696, lr_0 = 1.5209e-04
Loss = 3.6179e-01, PNorm = 63.2649, GNorm = 1.7612, lr_0 = 1.5199e-04
Loss = 3.3349e-01, PNorm = 63.2668, GNorm = 2.0008, lr_0 = 1.5189e-04
Loss = 4.0358e-01, PNorm = 63.2682, GNorm = 1.1174, lr_0 = 1.5178e-04
Loss = 3.3579e-01, PNorm = 63.2699, GNorm = 1.4308, lr_0 = 1.5168e-04
Loss = 3.9156e-01, PNorm = 63.2709, GNorm = 1.6450, lr_0 = 1.5157e-04
Loss = 3.9543e-01, PNorm = 63.2723, GNorm = 1.4475, lr_0 = 1.5147e-04
Loss = 3.4835e-01, PNorm = 63.2762, GNorm = 1.2820, lr_0 = 1.5137e-04
Loss = 3.6277e-01, PNorm = 63.2794, GNorm = 1.7023, lr_0 = 1.5126e-04
Loss = 3.3265e-01, PNorm = 63.2808, GNorm = 1.7405, lr_0 = 1.5116e-04
Loss = 3.2269e-01, PNorm = 63.2824, GNorm = 1.2731, lr_0 = 1.5106e-04
Loss = 3.4025e-01, PNorm = 63.2830, GNorm = 1.5910, lr_0 = 1.5095e-04
Loss = 3.1888e-01, PNorm = 63.2839, GNorm = 1.9167, lr_0 = 1.5085e-04
Validation mae = 0.111016
Epoch 25
Loss = 3.0700e-01, PNorm = 63.2866, GNorm = 1.3035, lr_0 = 1.5075e-04
Loss = 3.6444e-01, PNorm = 63.2893, GNorm = 1.1177, lr_0 = 1.5064e-04
Loss = 3.3095e-01, PNorm = 63.2927, GNorm = 1.1814, lr_0 = 1.5054e-04
Loss = 3.4088e-01, PNorm = 63.2953, GNorm = 1.3691, lr_0 = 1.5044e-04
Loss = 3.5551e-01, PNorm = 63.2959, GNorm = 1.4559, lr_0 = 1.5033e-04
Loss = 3.5309e-01, PNorm = 63.2950, GNorm = 1.5901, lr_0 = 1.5023e-04
Loss = 3.2766e-01, PNorm = 63.2957, GNorm = 1.6087, lr_0 = 1.5013e-04
Loss = 3.3199e-01, PNorm = 63.2994, GNorm = 1.4583, lr_0 = 1.5002e-04
Loss = 3.6002e-01, PNorm = 63.3011, GNorm = 2.3556, lr_0 = 1.4992e-04
Loss = 3.7983e-01, PNorm = 63.3001, GNorm = 1.2221, lr_0 = 1.4982e-04
Loss = 3.6067e-01, PNorm = 63.3033, GNorm = 1.3169, lr_0 = 1.4972e-04
Loss = 3.6994e-01, PNorm = 63.3077, GNorm = 1.4824, lr_0 = 1.4961e-04
Loss = 3.0694e-01, PNorm = 63.3084, GNorm = 1.8976, lr_0 = 1.4951e-04
Loss = 3.5493e-01, PNorm = 63.3093, GNorm = 1.5303, lr_0 = 1.4941e-04
Loss = 3.5950e-01, PNorm = 63.3130, GNorm = 1.4321, lr_0 = 1.4931e-04
Loss = 3.4205e-01, PNorm = 63.3159, GNorm = 1.1246, lr_0 = 1.4920e-04
Loss = 3.3207e-01, PNorm = 63.3164, GNorm = 1.2090, lr_0 = 1.4910e-04
Loss = 3.5973e-01, PNorm = 63.3149, GNorm = 1.1172, lr_0 = 1.4900e-04
Loss = 4.1106e-01, PNorm = 63.3147, GNorm = 1.4359, lr_0 = 1.4890e-04
Loss = 3.6168e-01, PNorm = 63.3173, GNorm = 1.2603, lr_0 = 1.4880e-04
Loss = 3.3182e-01, PNorm = 63.3198, GNorm = 1.8666, lr_0 = 1.4869e-04
Loss = 3.9471e-01, PNorm = 63.3195, GNorm = 1.9626, lr_0 = 1.4859e-04
Loss = 3.3505e-01, PNorm = 63.3216, GNorm = 1.9129, lr_0 = 1.4849e-04
Loss = 3.3740e-01, PNorm = 63.3266, GNorm = 1.5866, lr_0 = 1.4839e-04
Loss = 3.9689e-01, PNorm = 63.3273, GNorm = 1.8241, lr_0 = 1.4829e-04
Loss = 3.3337e-01, PNorm = 63.3265, GNorm = 1.3793, lr_0 = 1.4818e-04
Loss = 3.4322e-01, PNorm = 63.3287, GNorm = 1.3447, lr_0 = 1.4808e-04
Loss = 3.5878e-01, PNorm = 63.3304, GNorm = 1.1820, lr_0 = 1.4798e-04
Loss = 3.1079e-01, PNorm = 63.3312, GNorm = 1.4312, lr_0 = 1.4788e-04
Loss = 3.2700e-01, PNorm = 63.3311, GNorm = 1.4217, lr_0 = 1.4778e-04
Loss = 3.6031e-01, PNorm = 63.3310, GNorm = 1.2573, lr_0 = 1.4768e-04
Loss = 3.2001e-01, PNorm = 63.3310, GNorm = 0.9943, lr_0 = 1.4758e-04
Loss = 3.5852e-01, PNorm = 63.3330, GNorm = 1.2114, lr_0 = 1.4748e-04
Loss = 3.4844e-01, PNorm = 63.3351, GNorm = 1.5112, lr_0 = 1.4737e-04
Loss = 3.7118e-01, PNorm = 63.3359, GNorm = 1.2027, lr_0 = 1.4727e-04
Loss = 3.7235e-01, PNorm = 63.3350, GNorm = 1.5513, lr_0 = 1.4717e-04
Loss = 3.4035e-01, PNorm = 63.3337, GNorm = 1.2030, lr_0 = 1.4707e-04
Loss = 3.7334e-01, PNorm = 63.3356, GNorm = 1.9706, lr_0 = 1.4697e-04
Loss = 3.0370e-01, PNorm = 63.3382, GNorm = 1.3198, lr_0 = 1.4687e-04
Loss = 3.7349e-01, PNorm = 63.3407, GNorm = 1.6662, lr_0 = 1.4677e-04
Loss = 3.3566e-01, PNorm = 63.3419, GNorm = 1.2243, lr_0 = 1.4667e-04
Loss = 3.6273e-01, PNorm = 63.3415, GNorm = 2.0559, lr_0 = 1.4657e-04
Loss = 3.8054e-01, PNorm = 63.3432, GNorm = 1.6080, lr_0 = 1.4647e-04
Loss = 3.8143e-01, PNorm = 63.3451, GNorm = 2.6116, lr_0 = 1.4637e-04
Loss = 3.5748e-01, PNorm = 63.3484, GNorm = 1.5188, lr_0 = 1.4627e-04
Loss = 3.3627e-01, PNorm = 63.3494, GNorm = 1.5421, lr_0 = 1.4617e-04
Loss = 3.2917e-01, PNorm = 63.3494, GNorm = 1.6734, lr_0 = 1.4607e-04
Loss = 2.9722e-01, PNorm = 63.3508, GNorm = 1.5404, lr_0 = 1.4597e-04
Loss = 3.4984e-01, PNorm = 63.3508, GNorm = 1.2600, lr_0 = 1.4587e-04
Loss = 3.6425e-01, PNorm = 63.3523, GNorm = 1.5495, lr_0 = 1.4577e-04
Loss = 3.1133e-01, PNorm = 63.3527, GNorm = 1.3406, lr_0 = 1.4567e-04
Loss = 3.7022e-01, PNorm = 63.3536, GNorm = 1.8493, lr_0 = 1.4557e-04
Loss = 3.8269e-01, PNorm = 63.3552, GNorm = 1.9689, lr_0 = 1.4547e-04
Loss = 3.5707e-01, PNorm = 63.3590, GNorm = 1.5631, lr_0 = 1.4537e-04
Loss = 3.4111e-01, PNorm = 63.3614, GNorm = 1.0669, lr_0 = 1.4527e-04
Loss = 3.0566e-01, PNorm = 63.3620, GNorm = 1.0595, lr_0 = 1.4517e-04
Loss = 3.4601e-01, PNorm = 63.3630, GNorm = 1.3569, lr_0 = 1.4507e-04
Loss = 3.3842e-01, PNorm = 63.3659, GNorm = 1.2780, lr_0 = 1.4497e-04
Loss = 3.8059e-01, PNorm = 63.3683, GNorm = 1.4250, lr_0 = 1.4487e-04
Loss = 3.2699e-01, PNorm = 63.3707, GNorm = 1.4204, lr_0 = 1.4477e-04
Loss = 3.1032e-01, PNorm = 63.3701, GNorm = 1.7184, lr_0 = 1.4467e-04
Loss = 3.4675e-01, PNorm = 63.3711, GNorm = 1.5740, lr_0 = 1.4457e-04
Loss = 4.0682e-01, PNorm = 63.3723, GNorm = 1.6357, lr_0 = 1.4447e-04
Loss = 3.4126e-01, PNorm = 63.3739, GNorm = 1.2266, lr_0 = 1.4438e-04
Loss = 3.4030e-01, PNorm = 63.3758, GNorm = 1.4767, lr_0 = 1.4428e-04
Loss = 3.4433e-01, PNorm = 63.3772, GNorm = 1.9163, lr_0 = 1.4418e-04
Loss = 3.3090e-01, PNorm = 63.3790, GNorm = 1.2276, lr_0 = 1.4408e-04
Loss = 3.7556e-01, PNorm = 63.3787, GNorm = 1.3296, lr_0 = 1.4398e-04
Loss = 3.5514e-01, PNorm = 63.3819, GNorm = 1.6764, lr_0 = 1.4388e-04
Loss = 3.0589e-01, PNorm = 63.3847, GNorm = 1.0460, lr_0 = 1.4378e-04
Loss = 3.8697e-01, PNorm = 63.3866, GNorm = 1.6477, lr_0 = 1.4368e-04
Loss = 3.5491e-01, PNorm = 63.3869, GNorm = 1.3504, lr_0 = 1.4359e-04
Loss = 3.1294e-01, PNorm = 63.3883, GNorm = 1.7047, lr_0 = 1.4349e-04
Loss = 3.3466e-01, PNorm = 63.3880, GNorm = 1.2899, lr_0 = 1.4339e-04
Loss = 4.0474e-01, PNorm = 63.3895, GNorm = 1.3662, lr_0 = 1.4329e-04
Loss = 3.5373e-01, PNorm = 63.3904, GNorm = 1.7402, lr_0 = 1.4319e-04
Loss = 3.4273e-01, PNorm = 63.3915, GNorm = 1.6337, lr_0 = 1.4310e-04
Loss = 3.4017e-01, PNorm = 63.3904, GNorm = 1.3241, lr_0 = 1.4300e-04
Loss = 3.0670e-01, PNorm = 63.3910, GNorm = 1.2939, lr_0 = 1.4290e-04
Loss = 3.2838e-01, PNorm = 63.3920, GNorm = 1.2877, lr_0 = 1.4280e-04
Loss = 3.7614e-01, PNorm = 63.3946, GNorm = 1.5148, lr_0 = 1.4270e-04
Loss = 3.4706e-01, PNorm = 63.3957, GNorm = 1.2139, lr_0 = 1.4261e-04
Loss = 3.1061e-01, PNorm = 63.3988, GNorm = 1.8127, lr_0 = 1.4251e-04
Loss = 3.5019e-01, PNorm = 63.3991, GNorm = 1.9632, lr_0 = 1.4241e-04
Loss = 4.1818e-01, PNorm = 63.4007, GNorm = 1.4850, lr_0 = 1.4231e-04
Loss = 3.7042e-01, PNorm = 63.4036, GNorm = 1.7572, lr_0 = 1.4222e-04
Loss = 3.6611e-01, PNorm = 63.4055, GNorm = 2.4867, lr_0 = 1.4212e-04
Loss = 3.6516e-01, PNorm = 63.4071, GNorm = 1.6629, lr_0 = 1.4202e-04
Loss = 3.5406e-01, PNorm = 63.4088, GNorm = 1.6234, lr_0 = 1.4192e-04
Loss = 3.2347e-01, PNorm = 63.4085, GNorm = 1.3637, lr_0 = 1.4183e-04
Loss = 3.7213e-01, PNorm = 63.4085, GNorm = 1.7353, lr_0 = 1.4173e-04
Loss = 3.7673e-01, PNorm = 63.4085, GNorm = 1.5965, lr_0 = 1.4163e-04
Loss = 3.8166e-01, PNorm = 63.4077, GNorm = 1.3371, lr_0 = 1.4153e-04
Loss = 3.1998e-01, PNorm = 63.4086, GNorm = 1.9529, lr_0 = 1.4144e-04
Loss = 3.5735e-01, PNorm = 63.4094, GNorm = 1.4278, lr_0 = 1.4134e-04
Loss = 3.0101e-01, PNorm = 63.4107, GNorm = 1.2406, lr_0 = 1.4124e-04
Loss = 3.5022e-01, PNorm = 63.4105, GNorm = 1.1925, lr_0 = 1.4115e-04
Loss = 3.4399e-01, PNorm = 63.4119, GNorm = 2.0089, lr_0 = 1.4105e-04
Loss = 3.2994e-01, PNorm = 63.4147, GNorm = 1.2763, lr_0 = 1.4095e-04
Loss = 3.4300e-01, PNorm = 63.4172, GNorm = 1.1285, lr_0 = 1.4086e-04
Loss = 4.1255e-01, PNorm = 63.4191, GNorm = 1.4537, lr_0 = 1.4076e-04
Loss = 3.4400e-01, PNorm = 63.4218, GNorm = 1.5774, lr_0 = 1.4066e-04
Loss = 3.6666e-01, PNorm = 63.4226, GNorm = 1.6953, lr_0 = 1.4057e-04
Loss = 3.5428e-01, PNorm = 63.4232, GNorm = 1.6637, lr_0 = 1.4047e-04
Loss = 3.5013e-01, PNorm = 63.4235, GNorm = 1.1049, lr_0 = 1.4038e-04
Loss = 3.4814e-01, PNorm = 63.4244, GNorm = 1.4082, lr_0 = 1.4028e-04
Loss = 3.7778e-01, PNorm = 63.4251, GNorm = 1.9634, lr_0 = 1.4018e-04
Loss = 3.6571e-01, PNorm = 63.4274, GNorm = 1.7722, lr_0 = 1.4009e-04
Loss = 3.4063e-01, PNorm = 63.4305, GNorm = 2.0242, lr_0 = 1.3999e-04
Loss = 3.2194e-01, PNorm = 63.4313, GNorm = 1.4103, lr_0 = 1.3990e-04
Loss = 3.5134e-01, PNorm = 63.4327, GNorm = 1.1301, lr_0 = 1.3980e-04
Loss = 3.3991e-01, PNorm = 63.4347, GNorm = 1.3719, lr_0 = 1.3970e-04
Loss = 3.4424e-01, PNorm = 63.4355, GNorm = 1.5992, lr_0 = 1.3961e-04
Loss = 4.2477e-01, PNorm = 63.4356, GNorm = 2.6543, lr_0 = 1.3951e-04
Loss = 3.5476e-01, PNorm = 63.4362, GNorm = 1.6167, lr_0 = 1.3942e-04
Loss = 3.2674e-01, PNorm = 63.4386, GNorm = 1.3277, lr_0 = 1.3932e-04
Loss = 3.4581e-01, PNorm = 63.4395, GNorm = 1.6913, lr_0 = 1.3923e-04
Loss = 3.3957e-01, PNorm = 63.4414, GNorm = 2.1052, lr_0 = 1.3913e-04
Loss = 3.7475e-01, PNorm = 63.4423, GNorm = 1.6241, lr_0 = 1.3904e-04
Loss = 3.0870e-01, PNorm = 63.4434, GNorm = 1.5768, lr_0 = 1.3894e-04
Validation mae = 0.112486
Epoch 26
Loss = 3.7231e-01, PNorm = 63.4461, GNorm = 1.3518, lr_0 = 1.3884e-04
Loss = 3.6287e-01, PNorm = 63.4475, GNorm = 1.4328, lr_0 = 1.3875e-04
Loss = 3.6191e-01, PNorm = 63.4472, GNorm = 1.8363, lr_0 = 1.3865e-04
Loss = 3.2911e-01, PNorm = 63.4479, GNorm = 1.4019, lr_0 = 1.3856e-04
Loss = 3.0624e-01, PNorm = 63.4496, GNorm = 1.5653, lr_0 = 1.3846e-04
Loss = 3.6697e-01, PNorm = 63.4505, GNorm = 2.4782, lr_0 = 1.3837e-04
Loss = 2.8915e-01, PNorm = 63.4516, GNorm = 1.1281, lr_0 = 1.3828e-04
Loss = 3.5603e-01, PNorm = 63.4549, GNorm = 1.3348, lr_0 = 1.3818e-04
Loss = 3.2898e-01, PNorm = 63.4558, GNorm = 1.5364, lr_0 = 1.3809e-04
Loss = 3.8142e-01, PNorm = 63.4557, GNorm = 1.4196, lr_0 = 1.3799e-04
Loss = 3.6894e-01, PNorm = 63.4586, GNorm = 1.1317, lr_0 = 1.3790e-04
Loss = 3.7205e-01, PNorm = 63.4594, GNorm = 1.5391, lr_0 = 1.3780e-04
Loss = 3.1642e-01, PNorm = 63.4586, GNorm = 1.4658, lr_0 = 1.3771e-04
Loss = 3.6517e-01, PNorm = 63.4611, GNorm = 1.2611, lr_0 = 1.3761e-04
Loss = 3.7332e-01, PNorm = 63.4629, GNorm = 1.8461, lr_0 = 1.3752e-04
Loss = 3.4821e-01, PNorm = 63.4630, GNorm = 1.6045, lr_0 = 1.3742e-04
Loss = 3.2961e-01, PNorm = 63.4640, GNorm = 1.3841, lr_0 = 1.3733e-04
Loss = 3.6079e-01, PNorm = 63.4651, GNorm = 1.7911, lr_0 = 1.3724e-04
Loss = 3.0223e-01, PNorm = 63.4652, GNorm = 1.4422, lr_0 = 1.3714e-04
Loss = 3.3784e-01, PNorm = 63.4650, GNorm = 1.6382, lr_0 = 1.3705e-04
Loss = 3.9810e-01, PNorm = 63.4636, GNorm = 1.7017, lr_0 = 1.3695e-04
Loss = 3.5862e-01, PNorm = 63.4656, GNorm = 1.6954, lr_0 = 1.3686e-04
Loss = 3.6802e-01, PNorm = 63.4692, GNorm = 0.9516, lr_0 = 1.3677e-04
Loss = 3.4775e-01, PNorm = 63.4709, GNorm = 1.6039, lr_0 = 1.3667e-04
Loss = 3.4704e-01, PNorm = 63.4736, GNorm = 1.4786, lr_0 = 1.3658e-04
Loss = 3.3026e-01, PNorm = 63.4765, GNorm = 1.2132, lr_0 = 1.3649e-04
Loss = 3.3833e-01, PNorm = 63.4779, GNorm = 1.5871, lr_0 = 1.3639e-04
Loss = 3.2913e-01, PNorm = 63.4797, GNorm = 1.3595, lr_0 = 1.3630e-04
Loss = 3.4982e-01, PNorm = 63.4819, GNorm = 1.9537, lr_0 = 1.3621e-04
Loss = 3.2175e-01, PNorm = 63.4826, GNorm = 1.1651, lr_0 = 1.3611e-04
Loss = 3.6182e-01, PNorm = 63.4820, GNorm = 1.6742, lr_0 = 1.3602e-04
Loss = 3.7974e-01, PNorm = 63.4835, GNorm = 1.4551, lr_0 = 1.3593e-04
Loss = 3.4736e-01, PNorm = 63.4868, GNorm = 1.4914, lr_0 = 1.3583e-04
Loss = 3.3585e-01, PNorm = 63.4880, GNorm = 2.4550, lr_0 = 1.3574e-04
Loss = 3.9170e-01, PNorm = 63.4887, GNorm = 1.7342, lr_0 = 1.3565e-04
Loss = 3.1056e-01, PNorm = 63.4888, GNorm = 1.2176, lr_0 = 1.3555e-04
Loss = 3.3227e-01, PNorm = 63.4899, GNorm = 1.4159, lr_0 = 1.3546e-04
Loss = 3.3070e-01, PNorm = 63.4913, GNorm = 1.5438, lr_0 = 1.3537e-04
Loss = 2.8068e-01, PNorm = 63.4942, GNorm = 1.0156, lr_0 = 1.3528e-04
Loss = 3.6189e-01, PNorm = 63.4949, GNorm = 1.5371, lr_0 = 1.3518e-04
Loss = 3.6249e-01, PNorm = 63.4954, GNorm = 1.1982, lr_0 = 1.3509e-04
Loss = 3.8467e-01, PNorm = 63.4955, GNorm = 1.9340, lr_0 = 1.3500e-04
Loss = 3.5607e-01, PNorm = 63.4968, GNorm = 1.5643, lr_0 = 1.3491e-04
Loss = 3.0679e-01, PNorm = 63.4999, GNorm = 1.4807, lr_0 = 1.3481e-04
Loss = 3.3617e-01, PNorm = 63.5003, GNorm = 1.6349, lr_0 = 1.3472e-04
Loss = 3.9025e-01, PNorm = 63.4991, GNorm = 1.3699, lr_0 = 1.3463e-04
Loss = 3.5357e-01, PNorm = 63.5006, GNorm = 1.3012, lr_0 = 1.3454e-04
Loss = 3.6392e-01, PNorm = 63.5009, GNorm = 1.1508, lr_0 = 1.3444e-04
Loss = 3.1605e-01, PNorm = 63.5020, GNorm = 1.4511, lr_0 = 1.3435e-04
Loss = 3.3540e-01, PNorm = 63.5041, GNorm = 1.8341, lr_0 = 1.3426e-04
Loss = 3.4617e-01, PNorm = 63.5053, GNorm = 1.3874, lr_0 = 1.3417e-04
Loss = 3.5990e-01, PNorm = 63.5055, GNorm = 2.2035, lr_0 = 1.3408e-04
Loss = 3.2642e-01, PNorm = 63.5080, GNorm = 1.5043, lr_0 = 1.3398e-04
Loss = 2.7910e-01, PNorm = 63.5102, GNorm = 1.4661, lr_0 = 1.3389e-04
Loss = 3.4110e-01, PNorm = 63.5102, GNorm = 2.0035, lr_0 = 1.3380e-04
Loss = 3.5811e-01, PNorm = 63.5108, GNorm = 1.3906, lr_0 = 1.3371e-04
Loss = 3.4552e-01, PNorm = 63.5117, GNorm = 1.2018, lr_0 = 1.3362e-04
Loss = 3.8414e-01, PNorm = 63.5106, GNorm = 1.9463, lr_0 = 1.3353e-04
Loss = 3.4921e-01, PNorm = 63.5112, GNorm = 1.6071, lr_0 = 1.3343e-04
Loss = 3.8152e-01, PNorm = 63.5125, GNorm = 1.0872, lr_0 = 1.3334e-04
Loss = 3.6108e-01, PNorm = 63.5155, GNorm = 1.8678, lr_0 = 1.3325e-04
Loss = 3.3153e-01, PNorm = 63.5179, GNorm = 1.1493, lr_0 = 1.3316e-04
Loss = 3.4168e-01, PNorm = 63.5162, GNorm = 1.4607, lr_0 = 1.3307e-04
Loss = 3.7276e-01, PNorm = 63.5156, GNorm = 1.6917, lr_0 = 1.3298e-04
Loss = 3.4532e-01, PNorm = 63.5176, GNorm = 0.7618, lr_0 = 1.3289e-04
Loss = 3.5592e-01, PNorm = 63.5201, GNorm = 1.8237, lr_0 = 1.3280e-04
Loss = 3.8052e-01, PNorm = 63.5213, GNorm = 3.0097, lr_0 = 1.3270e-04
Loss = 3.4153e-01, PNorm = 63.5219, GNorm = 1.1249, lr_0 = 1.3261e-04
Loss = 2.8084e-01, PNorm = 63.5246, GNorm = 1.0669, lr_0 = 1.3252e-04
Loss = 3.5426e-01, PNorm = 63.5254, GNorm = 1.6428, lr_0 = 1.3243e-04
Loss = 3.6265e-01, PNorm = 63.5237, GNorm = 1.0784, lr_0 = 1.3234e-04
Loss = 3.1415e-01, PNorm = 63.5244, GNorm = 1.1024, lr_0 = 1.3225e-04
Loss = 3.5076e-01, PNorm = 63.5276, GNorm = 1.7927, lr_0 = 1.3216e-04
Loss = 3.5425e-01, PNorm = 63.5289, GNorm = 1.8273, lr_0 = 1.3207e-04
Loss = 3.6063e-01, PNorm = 63.5287, GNorm = 1.1296, lr_0 = 1.3198e-04
Loss = 3.9747e-01, PNorm = 63.5297, GNorm = 1.7999, lr_0 = 1.3189e-04
Loss = 3.6416e-01, PNorm = 63.5321, GNorm = 1.9837, lr_0 = 1.3180e-04
Loss = 3.9407e-01, PNorm = 63.5347, GNorm = 1.5800, lr_0 = 1.3171e-04
Loss = 3.2153e-01, PNorm = 63.5366, GNorm = 1.6475, lr_0 = 1.3162e-04
Loss = 3.2148e-01, PNorm = 63.5381, GNorm = 1.4256, lr_0 = 1.3153e-04
Loss = 3.4238e-01, PNorm = 63.5399, GNorm = 1.2658, lr_0 = 1.3144e-04
Loss = 3.3769e-01, PNorm = 63.5419, GNorm = 1.2411, lr_0 = 1.3135e-04
Loss = 3.8825e-01, PNorm = 63.5443, GNorm = 1.3482, lr_0 = 1.3126e-04
Loss = 3.5182e-01, PNorm = 63.5454, GNorm = 1.5523, lr_0 = 1.3117e-04
Loss = 3.1194e-01, PNorm = 63.5464, GNorm = 0.9844, lr_0 = 1.3108e-04
Loss = 3.5834e-01, PNorm = 63.5468, GNorm = 1.4194, lr_0 = 1.3099e-04
Loss = 3.4561e-01, PNorm = 63.5485, GNorm = 1.6390, lr_0 = 1.3090e-04
Loss = 3.2927e-01, PNorm = 63.5489, GNorm = 1.8344, lr_0 = 1.3081e-04
Loss = 3.2129e-01, PNorm = 63.5513, GNorm = 1.5968, lr_0 = 1.3072e-04
Loss = 3.4053e-01, PNorm = 63.5523, GNorm = 1.2339, lr_0 = 1.3063e-04
Loss = 3.5630e-01, PNorm = 63.5528, GNorm = 1.7383, lr_0 = 1.3054e-04
Loss = 3.5678e-01, PNorm = 63.5532, GNorm = 0.9759, lr_0 = 1.3045e-04
Loss = 3.3355e-01, PNorm = 63.5531, GNorm = 1.2216, lr_0 = 1.3036e-04
Loss = 3.7541e-01, PNorm = 63.5529, GNorm = 1.8500, lr_0 = 1.3027e-04
Loss = 3.5058e-01, PNorm = 63.5556, GNorm = 1.6850, lr_0 = 1.3018e-04
Loss = 3.5203e-01, PNorm = 63.5575, GNorm = 1.5667, lr_0 = 1.3009e-04
Loss = 2.7339e-01, PNorm = 63.5588, GNorm = 1.4786, lr_0 = 1.3000e-04
Loss = 3.2511e-01, PNorm = 63.5599, GNorm = 1.1747, lr_0 = 1.2992e-04
Loss = 3.2447e-01, PNorm = 63.5596, GNorm = 1.0599, lr_0 = 1.2983e-04
Loss = 3.7303e-01, PNorm = 63.5599, GNorm = 1.9949, lr_0 = 1.2974e-04
Loss = 3.2465e-01, PNorm = 63.5616, GNorm = 1.0814, lr_0 = 1.2965e-04
Loss = 3.3616e-01, PNorm = 63.5623, GNorm = 1.6651, lr_0 = 1.2956e-04
Loss = 3.6144e-01, PNorm = 63.5636, GNorm = 1.5394, lr_0 = 1.2947e-04
Loss = 3.3000e-01, PNorm = 63.5650, GNorm = 1.3246, lr_0 = 1.2938e-04
Loss = 3.8689e-01, PNorm = 63.5625, GNorm = 1.6667, lr_0 = 1.2929e-04
Loss = 3.6449e-01, PNorm = 63.5634, GNorm = 1.8555, lr_0 = 1.2921e-04
Loss = 3.6312e-01, PNorm = 63.5658, GNorm = 1.4825, lr_0 = 1.2912e-04
Loss = 4.0250e-01, PNorm = 63.5672, GNorm = 1.5144, lr_0 = 1.2903e-04
Loss = 3.4855e-01, PNorm = 63.5706, GNorm = 1.1186, lr_0 = 1.2894e-04
Loss = 3.9806e-01, PNorm = 63.5744, GNorm = 1.3162, lr_0 = 1.2885e-04
Loss = 3.0616e-01, PNorm = 63.5753, GNorm = 1.1980, lr_0 = 1.2876e-04
Loss = 3.8875e-01, PNorm = 63.5741, GNorm = 1.2283, lr_0 = 1.2867e-04
Loss = 3.6137e-01, PNorm = 63.5732, GNorm = 2.1541, lr_0 = 1.2859e-04
Loss = 3.4079e-01, PNorm = 63.5749, GNorm = 1.0771, lr_0 = 1.2850e-04
Loss = 3.3683e-01, PNorm = 63.5761, GNorm = 2.0327, lr_0 = 1.2841e-04
Loss = 3.4312e-01, PNorm = 63.5779, GNorm = 1.5905, lr_0 = 1.2832e-04
Loss = 3.2477e-01, PNorm = 63.5807, GNorm = 1.4405, lr_0 = 1.2823e-04
Loss = 3.6395e-01, PNorm = 63.5818, GNorm = 1.8352, lr_0 = 1.2815e-04
Loss = 3.3924e-01, PNorm = 63.5825, GNorm = 1.6135, lr_0 = 1.2806e-04
Loss = 4.2349e-01, PNorm = 63.5850, GNorm = 1.2912, lr_0 = 1.2797e-04
Validation mae = 0.111335
Epoch 27
Loss = 3.5356e-01, PNorm = 63.5847, GNorm = 1.3321, lr_0 = 1.2788e-04
Loss = 2.9361e-01, PNorm = 63.5861, GNorm = 1.2621, lr_0 = 1.2780e-04
Loss = 3.6377e-01, PNorm = 63.5874, GNorm = 1.7886, lr_0 = 1.2771e-04
Loss = 3.3024e-01, PNorm = 63.5893, GNorm = 1.6437, lr_0 = 1.2762e-04
Loss = 3.1039e-01, PNorm = 63.5899, GNorm = 1.3491, lr_0 = 1.2753e-04
Loss = 3.4223e-01, PNorm = 63.5917, GNorm = 1.2492, lr_0 = 1.2745e-04
Loss = 3.3497e-01, PNorm = 63.5936, GNorm = 1.5386, lr_0 = 1.2736e-04
Loss = 2.8437e-01, PNorm = 63.5946, GNorm = 1.3968, lr_0 = 1.2727e-04
Loss = 3.5085e-01, PNorm = 63.5960, GNorm = 1.4681, lr_0 = 1.2718e-04
Loss = 3.3801e-01, PNorm = 63.5970, GNorm = 1.9987, lr_0 = 1.2710e-04
Loss = 3.7497e-01, PNorm = 63.5990, GNorm = 1.7438, lr_0 = 1.2701e-04
Loss = 2.9177e-01, PNorm = 63.6010, GNorm = 1.8288, lr_0 = 1.2692e-04
Loss = 3.4022e-01, PNorm = 63.6003, GNorm = 1.3749, lr_0 = 1.2684e-04
Loss = 3.4444e-01, PNorm = 63.6012, GNorm = 1.6626, lr_0 = 1.2675e-04
Loss = 3.3227e-01, PNorm = 63.6028, GNorm = 2.0332, lr_0 = 1.2666e-04
Loss = 3.8792e-01, PNorm = 63.6047, GNorm = 1.6614, lr_0 = 1.2658e-04
Loss = 3.2880e-01, PNorm = 63.6055, GNorm = 1.5968, lr_0 = 1.2649e-04
Loss = 3.2936e-01, PNorm = 63.6062, GNorm = 1.7323, lr_0 = 1.2640e-04
Loss = 3.2897e-01, PNorm = 63.6070, GNorm = 1.7422, lr_0 = 1.2632e-04
Loss = 3.1897e-01, PNorm = 63.6088, GNorm = 1.2901, lr_0 = 1.2623e-04
Loss = 3.1789e-01, PNorm = 63.6102, GNorm = 1.3347, lr_0 = 1.2614e-04
Loss = 3.4792e-01, PNorm = 63.6090, GNorm = 1.7292, lr_0 = 1.2606e-04
Loss = 3.7277e-01, PNorm = 63.6091, GNorm = 1.3893, lr_0 = 1.2597e-04
Loss = 3.2174e-01, PNorm = 63.6106, GNorm = 1.3182, lr_0 = 1.2588e-04
Loss = 3.4768e-01, PNorm = 63.6138, GNorm = 2.0943, lr_0 = 1.2580e-04
Loss = 3.2385e-01, PNorm = 63.6140, GNorm = 1.7308, lr_0 = 1.2571e-04
Loss = 3.0001e-01, PNorm = 63.6141, GNorm = 1.7435, lr_0 = 1.2563e-04
Loss = 3.3656e-01, PNorm = 63.6143, GNorm = 1.3708, lr_0 = 1.2554e-04
Loss = 3.5848e-01, PNorm = 63.6142, GNorm = 2.0523, lr_0 = 1.2545e-04
Loss = 3.6200e-01, PNorm = 63.6148, GNorm = 1.2905, lr_0 = 1.2537e-04
Loss = 3.6759e-01, PNorm = 63.6160, GNorm = 1.4811, lr_0 = 1.2528e-04
Loss = 3.1353e-01, PNorm = 63.6183, GNorm = 1.8595, lr_0 = 1.2520e-04
Loss = 3.6692e-01, PNorm = 63.6218, GNorm = 1.1525, lr_0 = 1.2511e-04
Loss = 3.4845e-01, PNorm = 63.6247, GNorm = 1.2742, lr_0 = 1.2502e-04
Loss = 3.4001e-01, PNorm = 63.6259, GNorm = 1.9187, lr_0 = 1.2494e-04
Loss = 3.6018e-01, PNorm = 63.6285, GNorm = 1.6172, lr_0 = 1.2485e-04
Loss = 3.7156e-01, PNorm = 63.6301, GNorm = 1.0803, lr_0 = 1.2477e-04
Loss = 3.7523e-01, PNorm = 63.6299, GNorm = 1.3605, lr_0 = 1.2468e-04
Loss = 2.9652e-01, PNorm = 63.6311, GNorm = 1.4813, lr_0 = 1.2460e-04
Loss = 3.2389e-01, PNorm = 63.6311, GNorm = 1.1427, lr_0 = 1.2451e-04
Loss = 3.1760e-01, PNorm = 63.6311, GNorm = 1.8500, lr_0 = 1.2443e-04
Loss = 3.2588e-01, PNorm = 63.6321, GNorm = 1.5589, lr_0 = 1.2434e-04
Loss = 3.7589e-01, PNorm = 63.6327, GNorm = 2.6096, lr_0 = 1.2426e-04
Loss = 3.3328e-01, PNorm = 63.6323, GNorm = 1.3748, lr_0 = 1.2417e-04
Loss = 3.7049e-01, PNorm = 63.6329, GNorm = 1.4258, lr_0 = 1.2409e-04
Loss = 3.2771e-01, PNorm = 63.6348, GNorm = 1.7856, lr_0 = 1.2400e-04
Loss = 3.1101e-01, PNorm = 63.6372, GNorm = 1.4659, lr_0 = 1.2392e-04
Loss = 3.3583e-01, PNorm = 63.6380, GNorm = 1.6860, lr_0 = 1.2383e-04
Loss = 3.1079e-01, PNorm = 63.6376, GNorm = 1.1424, lr_0 = 1.2375e-04
Loss = 3.2806e-01, PNorm = 63.6385, GNorm = 1.4676, lr_0 = 1.2366e-04
Loss = 3.4842e-01, PNorm = 63.6402, GNorm = 1.5412, lr_0 = 1.2358e-04
Loss = 4.2368e-01, PNorm = 63.6395, GNorm = 1.7364, lr_0 = 1.2349e-04
Loss = 3.2442e-01, PNorm = 63.6392, GNorm = 1.2262, lr_0 = 1.2341e-04
Loss = 3.4753e-01, PNorm = 63.6400, GNorm = 1.9026, lr_0 = 1.2332e-04
Loss = 3.5600e-01, PNorm = 63.6415, GNorm = 2.0704, lr_0 = 1.2324e-04
Loss = 3.5295e-01, PNorm = 63.6423, GNorm = 1.0135, lr_0 = 1.2315e-04
Loss = 4.0487e-01, PNorm = 63.6430, GNorm = 1.6566, lr_0 = 1.2307e-04
Loss = 3.3759e-01, PNorm = 63.6456, GNorm = 1.5885, lr_0 = 1.2298e-04
Loss = 3.4141e-01, PNorm = 63.6473, GNorm = 1.3463, lr_0 = 1.2290e-04
Loss = 3.3840e-01, PNorm = 63.6474, GNorm = 1.4524, lr_0 = 1.2282e-04
Loss = 3.1470e-01, PNorm = 63.6500, GNorm = 1.7805, lr_0 = 1.2273e-04
Loss = 3.4444e-01, PNorm = 63.6520, GNorm = 1.1688, lr_0 = 1.2265e-04
Loss = 3.4245e-01, PNorm = 63.6527, GNorm = 1.2842, lr_0 = 1.2256e-04
Loss = 3.3577e-01, PNorm = 63.6525, GNorm = 1.1516, lr_0 = 1.2248e-04
Loss = 3.6300e-01, PNorm = 63.6525, GNorm = 1.2932, lr_0 = 1.2240e-04
Loss = 3.6885e-01, PNorm = 63.6526, GNorm = 1.6387, lr_0 = 1.2231e-04
Loss = 3.3349e-01, PNorm = 63.6546, GNorm = 1.3530, lr_0 = 1.2223e-04
Loss = 3.3736e-01, PNorm = 63.6546, GNorm = 1.3791, lr_0 = 1.2214e-04
Loss = 4.4503e-01, PNorm = 63.6564, GNorm = 1.2896, lr_0 = 1.2206e-04
Loss = 3.7486e-01, PNorm = 63.6579, GNorm = 1.1842, lr_0 = 1.2198e-04
Loss = 4.0075e-01, PNorm = 63.6586, GNorm = 1.4334, lr_0 = 1.2189e-04
Loss = 3.9386e-01, PNorm = 63.6600, GNorm = 1.3016, lr_0 = 1.2181e-04
Loss = 3.2168e-01, PNorm = 63.6618, GNorm = 1.5484, lr_0 = 1.2173e-04
Loss = 3.5284e-01, PNorm = 63.6635, GNorm = 1.6354, lr_0 = 1.2164e-04
Loss = 3.4686e-01, PNorm = 63.6636, GNorm = 1.8837, lr_0 = 1.2156e-04
Loss = 3.5032e-01, PNorm = 63.6659, GNorm = 1.8766, lr_0 = 1.2148e-04
Loss = 3.1902e-01, PNorm = 63.6681, GNorm = 1.2106, lr_0 = 1.2139e-04
Loss = 3.5047e-01, PNorm = 63.6672, GNorm = 1.6382, lr_0 = 1.2131e-04
Loss = 3.7942e-01, PNorm = 63.6678, GNorm = 1.5335, lr_0 = 1.2123e-04
Loss = 3.5796e-01, PNorm = 63.6689, GNorm = 2.0484, lr_0 = 1.2114e-04
Loss = 3.5405e-01, PNorm = 63.6699, GNorm = 1.3696, lr_0 = 1.2106e-04
Loss = 3.3851e-01, PNorm = 63.6708, GNorm = 2.0529, lr_0 = 1.2098e-04
Loss = 3.7527e-01, PNorm = 63.6714, GNorm = 1.5600, lr_0 = 1.2090e-04
Loss = 3.2971e-01, PNorm = 63.6713, GNorm = 1.1421, lr_0 = 1.2081e-04
Loss = 3.6158e-01, PNorm = 63.6734, GNorm = 1.5723, lr_0 = 1.2073e-04
Loss = 3.4678e-01, PNorm = 63.6761, GNorm = 1.0836, lr_0 = 1.2065e-04
Loss = 3.6595e-01, PNorm = 63.6788, GNorm = 1.6210, lr_0 = 1.2056e-04
Loss = 3.0351e-01, PNorm = 63.6791, GNorm = 1.3245, lr_0 = 1.2048e-04
Loss = 3.4967e-01, PNorm = 63.6787, GNorm = 1.9526, lr_0 = 1.2040e-04
Loss = 3.8566e-01, PNorm = 63.6796, GNorm = 1.5178, lr_0 = 1.2032e-04
Loss = 3.3856e-01, PNorm = 63.6813, GNorm = 1.4345, lr_0 = 1.2023e-04
Loss = 3.1996e-01, PNorm = 63.6815, GNorm = 1.4487, lr_0 = 1.2015e-04
Loss = 3.5555e-01, PNorm = 63.6828, GNorm = 1.7319, lr_0 = 1.2007e-04
Loss = 3.4770e-01, PNorm = 63.6849, GNorm = 1.3556, lr_0 = 1.1999e-04
Loss = 3.4524e-01, PNorm = 63.6854, GNorm = 1.6859, lr_0 = 1.1991e-04
Loss = 3.6420e-01, PNorm = 63.6848, GNorm = 1.9753, lr_0 = 1.1982e-04
Loss = 3.3078e-01, PNorm = 63.6863, GNorm = 1.5056, lr_0 = 1.1974e-04
Loss = 3.5644e-01, PNorm = 63.6878, GNorm = 1.3521, lr_0 = 1.1966e-04
Loss = 3.6172e-01, PNorm = 63.6888, GNorm = 1.4798, lr_0 = 1.1958e-04
Loss = 3.4461e-01, PNorm = 63.6886, GNorm = 1.7420, lr_0 = 1.1950e-04
Loss = 3.3634e-01, PNorm = 63.6897, GNorm = 1.4309, lr_0 = 1.1941e-04
Loss = 3.0639e-01, PNorm = 63.6893, GNorm = 1.0965, lr_0 = 1.1933e-04
Loss = 3.9611e-01, PNorm = 63.6885, GNorm = 1.6912, lr_0 = 1.1925e-04
Loss = 3.4581e-01, PNorm = 63.6902, GNorm = 1.5236, lr_0 = 1.1917e-04
Loss = 3.4788e-01, PNorm = 63.6916, GNorm = 1.7497, lr_0 = 1.1909e-04
Loss = 4.2004e-01, PNorm = 63.6926, GNorm = 1.4868, lr_0 = 1.1901e-04
Loss = 3.6027e-01, PNorm = 63.6940, GNorm = 1.5143, lr_0 = 1.1892e-04
Loss = 3.1465e-01, PNorm = 63.6958, GNorm = 1.4694, lr_0 = 1.1884e-04
Loss = 3.4580e-01, PNorm = 63.6959, GNorm = 1.2408, lr_0 = 1.1876e-04
Loss = 3.4261e-01, PNorm = 63.6971, GNorm = 1.6179, lr_0 = 1.1868e-04
Loss = 3.8686e-01, PNorm = 63.6988, GNorm = 1.5488, lr_0 = 1.1860e-04
Loss = 3.3637e-01, PNorm = 63.6988, GNorm = 1.7340, lr_0 = 1.1852e-04
Loss = 3.5973e-01, PNorm = 63.6989, GNorm = 1.3839, lr_0 = 1.1844e-04
Loss = 3.4393e-01, PNorm = 63.6996, GNorm = 1.5575, lr_0 = 1.1835e-04
Loss = 3.5734e-01, PNorm = 63.6994, GNorm = 1.5574, lr_0 = 1.1827e-04
Loss = 3.5716e-01, PNorm = 63.6978, GNorm = 1.3652, lr_0 = 1.1819e-04
Loss = 3.2399e-01, PNorm = 63.7011, GNorm = 1.2657, lr_0 = 1.1811e-04
Loss = 3.5463e-01, PNorm = 63.7016, GNorm = 0.8839, lr_0 = 1.1803e-04
Loss = 3.7179e-01, PNorm = 63.7019, GNorm = 1.6421, lr_0 = 1.1795e-04
Loss = 3.7299e-01, PNorm = 63.7041, GNorm = 1.2486, lr_0 = 1.1787e-04
Validation mae = 0.110870
Epoch 28
Loss = 3.6385e-01, PNorm = 63.7045, GNorm = 1.5301, lr_0 = 1.1779e-04
Loss = 3.4256e-01, PNorm = 63.7053, GNorm = 1.2675, lr_0 = 1.1771e-04
Loss = 4.0432e-01, PNorm = 63.7059, GNorm = 1.8425, lr_0 = 1.1763e-04
Loss = 3.4703e-01, PNorm = 63.7081, GNorm = 1.6510, lr_0 = 1.1755e-04
Loss = 3.1289e-01, PNorm = 63.7105, GNorm = 1.3498, lr_0 = 1.1747e-04
Loss = 3.4493e-01, PNorm = 63.7131, GNorm = 1.0315, lr_0 = 1.1739e-04
Loss = 3.6165e-01, PNorm = 63.7127, GNorm = 1.1896, lr_0 = 1.1730e-04
Loss = 3.1552e-01, PNorm = 63.7119, GNorm = 1.9283, lr_0 = 1.1722e-04
Loss = 3.6680e-01, PNorm = 63.7123, GNorm = 1.4559, lr_0 = 1.1714e-04
Loss = 3.4034e-01, PNorm = 63.7147, GNorm = 1.9022, lr_0 = 1.1706e-04
Loss = 3.1576e-01, PNorm = 63.7166, GNorm = 1.4200, lr_0 = 1.1698e-04
Loss = 3.3764e-01, PNorm = 63.7177, GNorm = 1.9831, lr_0 = 1.1690e-04
Loss = 3.5539e-01, PNorm = 63.7177, GNorm = 1.4074, lr_0 = 1.1682e-04
Loss = 3.4898e-01, PNorm = 63.7196, GNorm = 1.3542, lr_0 = 1.1674e-04
Loss = 3.5445e-01, PNorm = 63.7204, GNorm = 1.8115, lr_0 = 1.1666e-04
Loss = 3.4051e-01, PNorm = 63.7211, GNorm = 1.1101, lr_0 = 1.1658e-04
Loss = 3.6978e-01, PNorm = 63.7233, GNorm = 1.9777, lr_0 = 1.1650e-04
Loss = 3.3286e-01, PNorm = 63.7260, GNorm = 1.0512, lr_0 = 1.1642e-04
Loss = 3.4359e-01, PNorm = 63.7253, GNorm = 1.8023, lr_0 = 1.1634e-04
Loss = 3.0744e-01, PNorm = 63.7245, GNorm = 1.6654, lr_0 = 1.1626e-04
Loss = 3.2182e-01, PNorm = 63.7258, GNorm = 1.4606, lr_0 = 1.1618e-04
Loss = 3.4540e-01, PNorm = 63.7265, GNorm = 1.1964, lr_0 = 1.1611e-04
Loss = 3.4435e-01, PNorm = 63.7265, GNorm = 2.5180, lr_0 = 1.1603e-04
Loss = 3.4006e-01, PNorm = 63.7286, GNorm = 1.6283, lr_0 = 1.1595e-04
Loss = 3.4702e-01, PNorm = 63.7326, GNorm = 1.1476, lr_0 = 1.1587e-04
Loss = 2.9169e-01, PNorm = 63.7330, GNorm = 0.9959, lr_0 = 1.1579e-04
Loss = 4.2585e-01, PNorm = 63.7324, GNorm = 1.7511, lr_0 = 1.1571e-04
Loss = 3.3647e-01, PNorm = 63.7327, GNorm = 1.1588, lr_0 = 1.1563e-04
Loss = 3.8357e-01, PNorm = 63.7341, GNorm = 2.0876, lr_0 = 1.1555e-04
Loss = 3.4836e-01, PNorm = 63.7352, GNorm = 1.2373, lr_0 = 1.1547e-04
Loss = 3.7706e-01, PNorm = 63.7356, GNorm = 1.9711, lr_0 = 1.1539e-04
Loss = 2.9613e-01, PNorm = 63.7368, GNorm = 1.3360, lr_0 = 1.1531e-04
Loss = 3.7998e-01, PNorm = 63.7382, GNorm = 2.2018, lr_0 = 1.1523e-04
Loss = 3.3860e-01, PNorm = 63.7391, GNorm = 1.8832, lr_0 = 1.1515e-04
Loss = 3.2570e-01, PNorm = 63.7397, GNorm = 1.8517, lr_0 = 1.1508e-04
Loss = 3.3812e-01, PNorm = 63.7388, GNorm = 1.4857, lr_0 = 1.1500e-04
Loss = 3.2275e-01, PNorm = 63.7393, GNorm = 1.3617, lr_0 = 1.1492e-04
Loss = 3.8691e-01, PNorm = 63.7408, GNorm = 1.4301, lr_0 = 1.1484e-04
Loss = 3.6395e-01, PNorm = 63.7429, GNorm = 1.4467, lr_0 = 1.1476e-04
Loss = 3.1497e-01, PNorm = 63.7449, GNorm = 1.3829, lr_0 = 1.1468e-04
Loss = 3.2382e-01, PNorm = 63.7455, GNorm = 1.0832, lr_0 = 1.1460e-04
Loss = 3.6291e-01, PNorm = 63.7443, GNorm = 1.3671, lr_0 = 1.1452e-04
Loss = 3.5731e-01, PNorm = 63.7457, GNorm = 1.4821, lr_0 = 1.1445e-04
Loss = 3.5282e-01, PNorm = 63.7474, GNorm = 1.3637, lr_0 = 1.1437e-04
Loss = 3.3346e-01, PNorm = 63.7489, GNorm = 1.3026, lr_0 = 1.1429e-04
Loss = 3.6001e-01, PNorm = 63.7507, GNorm = 1.5796, lr_0 = 1.1421e-04
Loss = 3.2991e-01, PNorm = 63.7528, GNorm = 2.0784, lr_0 = 1.1413e-04
Loss = 3.4348e-01, PNorm = 63.7537, GNorm = 1.9539, lr_0 = 1.1405e-04
Loss = 3.7210e-01, PNorm = 63.7549, GNorm = 1.6746, lr_0 = 1.1398e-04
Loss = 2.9670e-01, PNorm = 63.7563, GNorm = 1.6284, lr_0 = 1.1390e-04
Loss = 3.4487e-01, PNorm = 63.7560, GNorm = 1.1690, lr_0 = 1.1382e-04
Loss = 3.4127e-01, PNorm = 63.7567, GNorm = 1.2551, lr_0 = 1.1374e-04
Loss = 3.1757e-01, PNorm = 63.7575, GNorm = 1.3869, lr_0 = 1.1366e-04
Loss = 3.1382e-01, PNorm = 63.7580, GNorm = 1.3594, lr_0 = 1.1359e-04
Loss = 3.4494e-01, PNorm = 63.7592, GNorm = 1.4494, lr_0 = 1.1351e-04
Loss = 3.0359e-01, PNorm = 63.7617, GNorm = 1.1152, lr_0 = 1.1343e-04
Loss = 3.2708e-01, PNorm = 63.7634, GNorm = 1.6023, lr_0 = 1.1335e-04
Loss = 3.7919e-01, PNorm = 63.7649, GNorm = 1.7080, lr_0 = 1.1328e-04
Loss = 4.1394e-01, PNorm = 63.7650, GNorm = 1.6337, lr_0 = 1.1320e-04
Loss = 3.3198e-01, PNorm = 63.7656, GNorm = 1.7112, lr_0 = 1.1312e-04
Loss = 3.6050e-01, PNorm = 63.7667, GNorm = 1.1950, lr_0 = 1.1304e-04
Loss = 3.0801e-01, PNorm = 63.7672, GNorm = 1.2249, lr_0 = 1.1297e-04
Loss = 3.6804e-01, PNorm = 63.7685, GNorm = 1.9971, lr_0 = 1.1289e-04
Loss = 3.4631e-01, PNorm = 63.7703, GNorm = 1.8592, lr_0 = 1.1281e-04
Loss = 3.1942e-01, PNorm = 63.7720, GNorm = 1.3344, lr_0 = 1.1273e-04
Loss = 3.0729e-01, PNorm = 63.7715, GNorm = 0.9991, lr_0 = 1.1266e-04
Loss = 3.2028e-01, PNorm = 63.7724, GNorm = 1.0584, lr_0 = 1.1258e-04
Loss = 3.3703e-01, PNorm = 63.7741, GNorm = 1.1951, lr_0 = 1.1250e-04
Loss = 3.1963e-01, PNorm = 63.7741, GNorm = 1.6024, lr_0 = 1.1243e-04
Loss = 2.9341e-01, PNorm = 63.7737, GNorm = 1.4320, lr_0 = 1.1235e-04
Loss = 3.3834e-01, PNorm = 63.7742, GNorm = 1.5046, lr_0 = 1.1227e-04
Loss = 3.4102e-01, PNorm = 63.7739, GNorm = 1.5802, lr_0 = 1.1219e-04
Loss = 3.3768e-01, PNorm = 63.7738, GNorm = 1.3597, lr_0 = 1.1212e-04
Loss = 2.9371e-01, PNorm = 63.7751, GNorm = 1.2424, lr_0 = 1.1204e-04
Loss = 3.2324e-01, PNorm = 63.7762, GNorm = 1.5593, lr_0 = 1.1196e-04
Loss = 3.3353e-01, PNorm = 63.7785, GNorm = 1.4837, lr_0 = 1.1189e-04
Loss = 3.6546e-01, PNorm = 63.7806, GNorm = 1.3013, lr_0 = 1.1181e-04
Loss = 4.4700e-01, PNorm = 63.7814, GNorm = 1.5908, lr_0 = 1.1173e-04
Loss = 3.3360e-01, PNorm = 63.7830, GNorm = 1.7478, lr_0 = 1.1166e-04
Loss = 3.6566e-01, PNorm = 63.7838, GNorm = 1.2621, lr_0 = 1.1158e-04
Loss = 3.6293e-01, PNorm = 63.7837, GNorm = 1.4260, lr_0 = 1.1150e-04
Loss = 3.2802e-01, PNorm = 63.7824, GNorm = 1.5802, lr_0 = 1.1143e-04
Loss = 3.5010e-01, PNorm = 63.7846, GNorm = 1.7406, lr_0 = 1.1135e-04
Loss = 3.2757e-01, PNorm = 63.7859, GNorm = 1.1876, lr_0 = 1.1128e-04
Loss = 3.7098e-01, PNorm = 63.7864, GNorm = 1.4004, lr_0 = 1.1120e-04
Loss = 3.4795e-01, PNorm = 63.7871, GNorm = 1.1720, lr_0 = 1.1112e-04
Loss = 3.3732e-01, PNorm = 63.7885, GNorm = 1.7397, lr_0 = 1.1105e-04
Loss = 3.7036e-01, PNorm = 63.7902, GNorm = 1.4056, lr_0 = 1.1097e-04
Loss = 3.7558e-01, PNorm = 63.7912, GNorm = 1.3883, lr_0 = 1.1089e-04
Loss = 3.4724e-01, PNorm = 63.7914, GNorm = 1.5904, lr_0 = 1.1082e-04
Loss = 3.4118e-01, PNorm = 63.7923, GNorm = 1.5758, lr_0 = 1.1074e-04
Loss = 3.0592e-01, PNorm = 63.7946, GNorm = 1.4008, lr_0 = 1.1067e-04
Loss = 3.4915e-01, PNorm = 63.7961, GNorm = 1.3342, lr_0 = 1.1059e-04
Loss = 3.9563e-01, PNorm = 63.7939, GNorm = 1.1865, lr_0 = 1.1052e-04
Loss = 4.0196e-01, PNorm = 63.7937, GNorm = 1.7228, lr_0 = 1.1044e-04
Loss = 3.2245e-01, PNorm = 63.7961, GNorm = 1.1557, lr_0 = 1.1036e-04
Loss = 2.9066e-01, PNorm = 63.7960, GNorm = 1.4551, lr_0 = 1.1029e-04
Loss = 3.4055e-01, PNorm = 63.7958, GNorm = 1.4377, lr_0 = 1.1021e-04
Loss = 3.2728e-01, PNorm = 63.7971, GNorm = 1.6495, lr_0 = 1.1014e-04
Loss = 3.5993e-01, PNorm = 63.7985, GNorm = 1.9127, lr_0 = 1.1006e-04
Loss = 3.5569e-01, PNorm = 63.7999, GNorm = 1.3062, lr_0 = 1.0999e-04
Loss = 3.1778e-01, PNorm = 63.7993, GNorm = 1.4744, lr_0 = 1.0991e-04
Loss = 3.4281e-01, PNorm = 63.7993, GNorm = 2.4191, lr_0 = 1.0984e-04
Loss = 3.6836e-01, PNorm = 63.8030, GNorm = 1.4533, lr_0 = 1.0976e-04
Loss = 3.4927e-01, PNorm = 63.8046, GNorm = 1.7506, lr_0 = 1.0969e-04
Loss = 3.4129e-01, PNorm = 63.8043, GNorm = 1.2792, lr_0 = 1.0961e-04
Loss = 4.3747e-01, PNorm = 63.8054, GNorm = 2.1734, lr_0 = 1.0954e-04
Loss = 3.4284e-01, PNorm = 63.8065, GNorm = 1.4054, lr_0 = 1.0946e-04
Loss = 2.9057e-01, PNorm = 63.8074, GNorm = 1.2544, lr_0 = 1.0939e-04
Loss = 3.2020e-01, PNorm = 63.8081, GNorm = 1.1329, lr_0 = 1.0931e-04
Loss = 3.0387e-01, PNorm = 63.8091, GNorm = 1.1793, lr_0 = 1.0924e-04
Loss = 4.1799e-01, PNorm = 63.8106, GNorm = 1.4017, lr_0 = 1.0916e-04
Loss = 3.4493e-01, PNorm = 63.8101, GNorm = 1.5568, lr_0 = 1.0909e-04
Loss = 3.4600e-01, PNorm = 63.8109, GNorm = 1.2925, lr_0 = 1.0901e-04
Loss = 3.3987e-01, PNorm = 63.8124, GNorm = 1.5391, lr_0 = 1.0894e-04
Loss = 3.8961e-01, PNorm = 63.8128, GNorm = 2.0213, lr_0 = 1.0886e-04
Loss = 3.8183e-01, PNorm = 63.8121, GNorm = 1.3475, lr_0 = 1.0879e-04
Loss = 3.3650e-01, PNorm = 63.8128, GNorm = 1.3890, lr_0 = 1.0871e-04
Loss = 3.4040e-01, PNorm = 63.8154, GNorm = 1.6035, lr_0 = 1.0864e-04
Loss = 3.8579e-01, PNorm = 63.8147, GNorm = 1.3996, lr_0 = 1.0856e-04
Validation mae = 0.110917
Epoch 29
Loss = 3.4542e-01, PNorm = 63.8150, GNorm = 1.8014, lr_0 = 1.0849e-04
Loss = 3.5830e-01, PNorm = 63.8171, GNorm = 1.1547, lr_0 = 1.0841e-04
Loss = 3.2100e-01, PNorm = 63.8182, GNorm = 1.2711, lr_0 = 1.0834e-04
Loss = 3.4447e-01, PNorm = 63.8180, GNorm = 1.3755, lr_0 = 1.0827e-04
Loss = 3.5408e-01, PNorm = 63.8185, GNorm = 1.2115, lr_0 = 1.0819e-04
Loss = 3.3482e-01, PNorm = 63.8204, GNorm = 1.1528, lr_0 = 1.0812e-04
Loss = 3.2733e-01, PNorm = 63.8224, GNorm = 1.7132, lr_0 = 1.0804e-04
Loss = 3.3794e-01, PNorm = 63.8231, GNorm = 1.1320, lr_0 = 1.0797e-04
Loss = 2.6501e-01, PNorm = 63.8248, GNorm = 1.5668, lr_0 = 1.0790e-04
Loss = 2.9981e-01, PNorm = 63.8264, GNorm = 1.2805, lr_0 = 1.0782e-04
Loss = 3.9297e-01, PNorm = 63.8271, GNorm = 1.6536, lr_0 = 1.0775e-04
Loss = 3.3925e-01, PNorm = 63.8279, GNorm = 1.6671, lr_0 = 1.0767e-04
Loss = 3.2503e-01, PNorm = 63.8299, GNorm = 1.2584, lr_0 = 1.0760e-04
Loss = 3.4188e-01, PNorm = 63.8314, GNorm = 1.6209, lr_0 = 1.0753e-04
Loss = 3.3356e-01, PNorm = 63.8308, GNorm = 1.5525, lr_0 = 1.0745e-04
Loss = 3.0114e-01, PNorm = 63.8307, GNorm = 1.0081, lr_0 = 1.0738e-04
Loss = 3.5627e-01, PNorm = 63.8317, GNorm = 2.0653, lr_0 = 1.0731e-04
Loss = 3.5542e-01, PNorm = 63.8313, GNorm = 1.6198, lr_0 = 1.0723e-04
Loss = 3.7961e-01, PNorm = 63.8308, GNorm = 1.7657, lr_0 = 1.0716e-04
Loss = 3.5371e-01, PNorm = 63.8313, GNorm = 1.4800, lr_0 = 1.0709e-04
Loss = 3.2988e-01, PNorm = 63.8333, GNorm = 1.0614, lr_0 = 1.0701e-04
Loss = 3.1644e-01, PNorm = 63.8349, GNorm = 1.3226, lr_0 = 1.0694e-04
Loss = 3.6442e-01, PNorm = 63.8350, GNorm = 1.1955, lr_0 = 1.0687e-04
Loss = 3.5188e-01, PNorm = 63.8350, GNorm = 1.2930, lr_0 = 1.0679e-04
Loss = 2.9235e-01, PNorm = 63.8362, GNorm = 1.9827, lr_0 = 1.0672e-04
Loss = 3.2216e-01, PNorm = 63.8377, GNorm = 1.5886, lr_0 = 1.0665e-04
Loss = 3.3890e-01, PNorm = 63.8375, GNorm = 1.7001, lr_0 = 1.0657e-04
Loss = 3.6351e-01, PNorm = 63.8379, GNorm = 1.1564, lr_0 = 1.0650e-04
Loss = 3.4680e-01, PNorm = 63.8384, GNorm = 1.4009, lr_0 = 1.0643e-04
Loss = 3.7259e-01, PNorm = 63.8389, GNorm = 1.3354, lr_0 = 1.0635e-04
Loss = 3.4556e-01, PNorm = 63.8396, GNorm = 1.3528, lr_0 = 1.0628e-04
Loss = 3.4318e-01, PNorm = 63.8405, GNorm = 1.3415, lr_0 = 1.0621e-04
Loss = 2.9627e-01, PNorm = 63.8400, GNorm = 1.5486, lr_0 = 1.0614e-04
Loss = 3.2183e-01, PNorm = 63.8414, GNorm = 1.4388, lr_0 = 1.0606e-04
Loss = 3.2041e-01, PNorm = 63.8437, GNorm = 1.2548, lr_0 = 1.0599e-04
Loss = 3.5347e-01, PNorm = 63.8440, GNorm = 1.8285, lr_0 = 1.0592e-04
Loss = 3.2450e-01, PNorm = 63.8445, GNorm = 1.2922, lr_0 = 1.0585e-04
Loss = 3.5204e-01, PNorm = 63.8438, GNorm = 1.6565, lr_0 = 1.0577e-04
Loss = 3.2709e-01, PNorm = 63.8446, GNorm = 1.7145, lr_0 = 1.0570e-04
Loss = 2.9884e-01, PNorm = 63.8466, GNorm = 1.8567, lr_0 = 1.0563e-04
Loss = 3.2141e-01, PNorm = 63.8475, GNorm = 1.0466, lr_0 = 1.0556e-04
Loss = 3.5006e-01, PNorm = 63.8489, GNorm = 1.2418, lr_0 = 1.0548e-04
Loss = 2.9720e-01, PNorm = 63.8503, GNorm = 1.4132, lr_0 = 1.0541e-04
Loss = 3.2759e-01, PNorm = 63.8511, GNorm = 1.7907, lr_0 = 1.0534e-04
Loss = 3.8019e-01, PNorm = 63.8510, GNorm = 1.3827, lr_0 = 1.0527e-04
Loss = 3.2740e-01, PNorm = 63.8506, GNorm = 1.6933, lr_0 = 1.0519e-04
Loss = 3.5049e-01, PNorm = 63.8499, GNorm = 1.4047, lr_0 = 1.0512e-04
Loss = 3.7541e-01, PNorm = 63.8513, GNorm = 1.5733, lr_0 = 1.0505e-04
Loss = 3.2324e-01, PNorm = 63.8517, GNorm = 1.7478, lr_0 = 1.0498e-04
Loss = 3.5638e-01, PNorm = 63.8518, GNorm = 1.2374, lr_0 = 1.0491e-04
Loss = 3.1914e-01, PNorm = 63.8523, GNorm = 1.0631, lr_0 = 1.0483e-04
Loss = 3.0467e-01, PNorm = 63.8521, GNorm = 1.4277, lr_0 = 1.0476e-04
Loss = 3.3081e-01, PNorm = 63.8525, GNorm = 1.1819, lr_0 = 1.0469e-04
Loss = 3.4755e-01, PNorm = 63.8550, GNorm = 1.2036, lr_0 = 1.0462e-04
Loss = 3.5375e-01, PNorm = 63.8550, GNorm = 1.3100, lr_0 = 1.0455e-04
Loss = 3.3900e-01, PNorm = 63.8547, GNorm = 1.5511, lr_0 = 1.0448e-04
Loss = 2.9699e-01, PNorm = 63.8559, GNorm = 1.5251, lr_0 = 1.0440e-04
Loss = 3.5417e-01, PNorm = 63.8570, GNorm = 1.3388, lr_0 = 1.0433e-04
Loss = 3.4605e-01, PNorm = 63.8563, GNorm = 1.6427, lr_0 = 1.0426e-04
Loss = 3.0438e-01, PNorm = 63.8579, GNorm = 1.2644, lr_0 = 1.0419e-04
Loss = 3.6915e-01, PNorm = 63.8593, GNorm = 1.2651, lr_0 = 1.0412e-04
Loss = 3.6433e-01, PNorm = 63.8594, GNorm = 1.6780, lr_0 = 1.0405e-04
Loss = 3.0633e-01, PNorm = 63.8608, GNorm = 1.4671, lr_0 = 1.0398e-04
Loss = 3.6860e-01, PNorm = 63.8626, GNorm = 1.5420, lr_0 = 1.0391e-04
Loss = 4.1471e-01, PNorm = 63.8629, GNorm = 1.6174, lr_0 = 1.0383e-04
Loss = 3.3413e-01, PNorm = 63.8622, GNorm = 1.1875, lr_0 = 1.0376e-04
Loss = 3.2932e-01, PNorm = 63.8638, GNorm = 1.6384, lr_0 = 1.0369e-04
Loss = 3.5586e-01, PNorm = 63.8654, GNorm = 2.0940, lr_0 = 1.0362e-04
Loss = 3.6932e-01, PNorm = 63.8685, GNorm = 1.3163, lr_0 = 1.0355e-04
Loss = 3.5159e-01, PNorm = 63.8706, GNorm = 1.7286, lr_0 = 1.0348e-04
Loss = 3.3757e-01, PNorm = 63.8709, GNorm = 1.2825, lr_0 = 1.0341e-04
Loss = 3.4637e-01, PNorm = 63.8705, GNorm = 2.0082, lr_0 = 1.0334e-04
Loss = 3.7037e-01, PNorm = 63.8712, GNorm = 2.0562, lr_0 = 1.0327e-04
Loss = 3.3106e-01, PNorm = 63.8727, GNorm = 1.6147, lr_0 = 1.0320e-04
Loss = 3.1750e-01, PNorm = 63.8744, GNorm = 1.6434, lr_0 = 1.0312e-04
Loss = 3.9554e-01, PNorm = 63.8754, GNorm = 2.0809, lr_0 = 1.0305e-04
Loss = 3.8697e-01, PNorm = 63.8752, GNorm = 1.7770, lr_0 = 1.0298e-04
Loss = 3.3463e-01, PNorm = 63.8759, GNorm = 1.9965, lr_0 = 1.0291e-04
Loss = 3.5576e-01, PNorm = 63.8749, GNorm = 1.2683, lr_0 = 1.0284e-04
Loss = 3.2507e-01, PNorm = 63.8747, GNorm = 1.4935, lr_0 = 1.0277e-04
Loss = 3.3560e-01, PNorm = 63.8754, GNorm = 1.2939, lr_0 = 1.0270e-04
Loss = 3.9923e-01, PNorm = 63.8776, GNorm = 1.4952, lr_0 = 1.0263e-04
Loss = 3.2773e-01, PNorm = 63.8805, GNorm = 1.4263, lr_0 = 1.0256e-04
Loss = 3.6254e-01, PNorm = 63.8829, GNorm = 1.3135, lr_0 = 1.0249e-04
Loss = 3.6726e-01, PNorm = 63.8842, GNorm = 1.3618, lr_0 = 1.0242e-04
Loss = 3.1615e-01, PNorm = 63.8852, GNorm = 1.3259, lr_0 = 1.0235e-04
Loss = 3.8939e-01, PNorm = 63.8867, GNorm = 2.1749, lr_0 = 1.0228e-04
Loss = 3.4132e-01, PNorm = 63.8891, GNorm = 1.4115, lr_0 = 1.0221e-04
Loss = 3.3975e-01, PNorm = 63.8903, GNorm = 1.2121, lr_0 = 1.0214e-04
Loss = 4.4790e-01, PNorm = 63.8902, GNorm = 1.8362, lr_0 = 1.0207e-04
Loss = 3.4955e-01, PNorm = 63.8898, GNorm = 1.5178, lr_0 = 1.0200e-04
Loss = 3.6139e-01, PNorm = 63.8908, GNorm = 1.1824, lr_0 = 1.0193e-04
Loss = 3.6073e-01, PNorm = 63.8917, GNorm = 2.4586, lr_0 = 1.0186e-04
Loss = 4.0277e-01, PNorm = 63.8933, GNorm = 1.7221, lr_0 = 1.0179e-04
Loss = 3.3887e-01, PNorm = 63.8962, GNorm = 1.3637, lr_0 = 1.0172e-04
Loss = 3.4035e-01, PNorm = 63.8990, GNorm = 1.4413, lr_0 = 1.0165e-04
Loss = 3.3873e-01, PNorm = 63.9001, GNorm = 1.9405, lr_0 = 1.0158e-04
Loss = 3.1438e-01, PNorm = 63.9024, GNorm = 1.7443, lr_0 = 1.0151e-04
Loss = 3.6801e-01, PNorm = 63.9036, GNorm = 1.7631, lr_0 = 1.0144e-04
Loss = 3.9101e-01, PNorm = 63.9027, GNorm = 1.7616, lr_0 = 1.0137e-04
Loss = 3.1355e-01, PNorm = 63.9031, GNorm = 1.9259, lr_0 = 1.0130e-04
Loss = 3.5233e-01, PNorm = 63.9044, GNorm = 1.6918, lr_0 = 1.0123e-04
Loss = 3.3189e-01, PNorm = 63.9053, GNorm = 1.4643, lr_0 = 1.0116e-04
Loss = 3.0964e-01, PNorm = 63.9058, GNorm = 2.0473, lr_0 = 1.0110e-04
Loss = 3.9995e-01, PNorm = 63.9064, GNorm = 1.6899, lr_0 = 1.0103e-04
Loss = 3.4184e-01, PNorm = 63.9088, GNorm = 1.2985, lr_0 = 1.0096e-04
Loss = 3.4301e-01, PNorm = 63.9099, GNorm = 1.3470, lr_0 = 1.0089e-04
Loss = 3.2510e-01, PNorm = 63.9105, GNorm = 1.6822, lr_0 = 1.0082e-04
Loss = 3.3023e-01, PNorm = 63.9114, GNorm = 1.5676, lr_0 = 1.0075e-04
Loss = 3.3567e-01, PNorm = 63.9114, GNorm = 1.8482, lr_0 = 1.0068e-04
Loss = 3.2318e-01, PNorm = 63.9112, GNorm = 1.4568, lr_0 = 1.0061e-04
Loss = 3.3223e-01, PNorm = 63.9129, GNorm = 1.0889, lr_0 = 1.0054e-04
Loss = 3.2733e-01, PNorm = 63.9132, GNorm = 1.2301, lr_0 = 1.0047e-04
Loss = 3.0805e-01, PNorm = 63.9132, GNorm = 1.6006, lr_0 = 1.0041e-04
Loss = 2.7869e-01, PNorm = 63.9136, GNorm = 1.7868, lr_0 = 1.0034e-04
Loss = 3.0720e-01, PNorm = 63.9154, GNorm = 1.3773, lr_0 = 1.0027e-04
Loss = 3.4415e-01, PNorm = 63.9164, GNorm = 1.4575, lr_0 = 1.0020e-04
Loss = 3.2199e-01, PNorm = 63.9156, GNorm = 1.1960, lr_0 = 1.0013e-04
Loss = 4.0550e-01, PNorm = 63.9151, GNorm = 1.4107, lr_0 = 1.0006e-04
Loss = 3.7925e-01, PNorm = 63.9159, GNorm = 1.6541, lr_0 = 1.0000e-04
Validation mae = 0.111320
Model 0 best validation mae = 0.110870 on epoch 27
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.111068
Ensemble test mae = 0.111068
Fold 1
Splitting data with seed 1
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=300, bias=False)
        (W_h): Linear(in_features=300, out_features=300, bias=False)
        (W_o): Linear(in_features=433, out_features=300, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.2, inplace=False)
    (1): Linear(in_features=300, out_features=300, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=300, out_features=300, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.2, inplace=False)
    (7): Linear(in_features=300, out_features=1, bias=True)
  )
)
Number of parameters = 445,201
Moving model to cuda
Epoch 0
Loss = 1.0396e+00, PNorm = 38.1704, GNorm = 4.2164, lr_0 = 1.0413e-04
Loss = 1.0389e+00, PNorm = 38.1698, GNorm = 2.2714, lr_0 = 1.0788e-04
Loss = 1.0382e+00, PNorm = 38.1697, GNorm = 3.0849, lr_0 = 1.1163e-04
Loss = 1.0019e+00, PNorm = 38.1699, GNorm = 2.1584, lr_0 = 1.1537e-04
Loss = 8.7347e-01, PNorm = 38.1703, GNorm = 5.0945, lr_0 = 1.1913e-04
Loss = 1.0459e+00, PNorm = 38.1710, GNorm = 2.4445, lr_0 = 1.2287e-04
Loss = 9.2836e-01, PNorm = 38.1715, GNorm = 3.9649, lr_0 = 1.2663e-04
Loss = 9.1257e-01, PNorm = 38.1722, GNorm = 3.5100, lr_0 = 1.3038e-04
Loss = 9.4017e-01, PNorm = 38.1734, GNorm = 2.6676, lr_0 = 1.3413e-04
Loss = 8.8074e-01, PNorm = 38.1744, GNorm = 2.0260, lr_0 = 1.3788e-04
Loss = 9.9588e-01, PNorm = 38.1756, GNorm = 3.6574, lr_0 = 1.4163e-04
Loss = 8.2025e-01, PNorm = 38.1771, GNorm = 2.4788, lr_0 = 1.4537e-04
Loss = 9.2968e-01, PNorm = 38.1787, GNorm = 2.8268, lr_0 = 1.4913e-04
Loss = 9.4354e-01, PNorm = 38.1810, GNorm = 3.2759, lr_0 = 1.5288e-04
Loss = 8.3274e-01, PNorm = 38.1830, GNorm = 3.0088, lr_0 = 1.5662e-04
Loss = 8.4827e-01, PNorm = 38.1849, GNorm = 7.0028, lr_0 = 1.6038e-04
Loss = 8.0429e-01, PNorm = 38.1869, GNorm = 3.5342, lr_0 = 1.6412e-04
Loss = 7.1369e-01, PNorm = 38.1896, GNorm = 2.0492, lr_0 = 1.6788e-04
Loss = 8.0666e-01, PNorm = 38.1918, GNorm = 8.6429, lr_0 = 1.7163e-04
Loss = 8.9221e-01, PNorm = 38.1945, GNorm = 3.4178, lr_0 = 1.7538e-04
Loss = 8.1646e-01, PNorm = 38.1974, GNorm = 6.8178, lr_0 = 1.7913e-04
Loss = 7.6448e-01, PNorm = 38.2008, GNorm = 4.0432, lr_0 = 1.8288e-04
Loss = 7.4785e-01, PNorm = 38.2041, GNorm = 2.0682, lr_0 = 1.8662e-04
Loss = 8.5083e-01, PNorm = 38.2073, GNorm = 2.5272, lr_0 = 1.9038e-04
Loss = 8.0649e-01, PNorm = 38.2091, GNorm = 3.9122, lr_0 = 1.9413e-04
Loss = 7.1752e-01, PNorm = 38.2123, GNorm = 1.5003, lr_0 = 1.9788e-04
Loss = 7.4651e-01, PNorm = 38.2157, GNorm = 2.8868, lr_0 = 2.0163e-04
Loss = 7.5592e-01, PNorm = 38.2184, GNorm = 8.6765, lr_0 = 2.0537e-04
Loss = 8.5226e-01, PNorm = 38.2198, GNorm = 4.4634, lr_0 = 2.0913e-04
Loss = 7.5396e-01, PNorm = 38.2229, GNorm = 7.3713, lr_0 = 2.1288e-04
Loss = 7.0013e-01, PNorm = 38.2248, GNorm = 5.9747, lr_0 = 2.1663e-04
Loss = 5.9784e-01, PNorm = 38.2272, GNorm = 4.1320, lr_0 = 2.2038e-04
Loss = 7.4577e-01, PNorm = 38.2295, GNorm = 4.2145, lr_0 = 2.2412e-04
Loss = 8.4267e-01, PNorm = 38.2312, GNorm = 6.1553, lr_0 = 2.2787e-04
Loss = 6.5927e-01, PNorm = 38.2339, GNorm = 4.3047, lr_0 = 2.3163e-04
Loss = 7.2356e-01, PNorm = 38.2366, GNorm = 5.4626, lr_0 = 2.3538e-04
Loss = 6.2657e-01, PNorm = 38.2400, GNorm = 2.1005, lr_0 = 2.3913e-04
Loss = 8.2114e-01, PNorm = 38.2428, GNorm = 4.2095, lr_0 = 2.4288e-04
Loss = 6.2148e-01, PNorm = 38.2469, GNorm = 2.0331, lr_0 = 2.4662e-04
Loss = 6.9357e-01, PNorm = 38.2489, GNorm = 13.5015, lr_0 = 2.5038e-04
Loss = 8.0433e-01, PNorm = 38.2482, GNorm = 8.6113, lr_0 = 2.5413e-04
Loss = 6.7550e-01, PNorm = 38.2510, GNorm = 4.1254, lr_0 = 2.5788e-04
Loss = 7.6182e-01, PNorm = 38.2558, GNorm = 1.5572, lr_0 = 2.6163e-04
Loss = 6.7382e-01, PNorm = 38.2595, GNorm = 2.2756, lr_0 = 2.6537e-04
Loss = 7.3028e-01, PNorm = 38.2636, GNorm = 2.2017, lr_0 = 2.6912e-04
Loss = 6.8226e-01, PNorm = 38.2650, GNorm = 1.7687, lr_0 = 2.7288e-04
Loss = 7.4328e-01, PNorm = 38.2692, GNorm = 3.0178, lr_0 = 2.7663e-04
Loss = 7.7249e-01, PNorm = 38.2735, GNorm = 5.1119, lr_0 = 2.8038e-04
Loss = 6.1886e-01, PNorm = 38.2796, GNorm = 5.4177, lr_0 = 2.8413e-04
Loss = 6.8407e-01, PNorm = 38.2833, GNorm = 1.6389, lr_0 = 2.8787e-04
Loss = 6.5644e-01, PNorm = 38.2856, GNorm = 1.4910, lr_0 = 2.9163e-04
Loss = 6.6533e-01, PNorm = 38.2889, GNorm = 1.9304, lr_0 = 2.9538e-04
Loss = 7.0822e-01, PNorm = 38.2929, GNorm = 2.2580, lr_0 = 2.9913e-04
Loss = 6.6382e-01, PNorm = 38.2996, GNorm = 2.5498, lr_0 = 3.0288e-04
Loss = 6.8259e-01, PNorm = 38.3051, GNorm = 3.6698, lr_0 = 3.0662e-04
Loss = 7.1532e-01, PNorm = 38.3101, GNorm = 4.6014, lr_0 = 3.1037e-04
Loss = 6.5554e-01, PNorm = 38.3131, GNorm = 1.2815, lr_0 = 3.1413e-04
Loss = 5.5686e-01, PNorm = 38.3156, GNorm = 2.3427, lr_0 = 3.1788e-04
Loss = 6.4336e-01, PNorm = 38.3193, GNorm = 5.0612, lr_0 = 3.2163e-04
Loss = 7.3625e-01, PNorm = 38.3232, GNorm = 1.2875, lr_0 = 3.2538e-04
Loss = 7.3427e-01, PNorm = 38.3262, GNorm = 4.4694, lr_0 = 3.2912e-04
Loss = 7.3007e-01, PNorm = 38.3304, GNorm = 3.4977, lr_0 = 3.3288e-04
Loss = 6.9840e-01, PNorm = 38.3357, GNorm = 1.0910, lr_0 = 3.3663e-04
Loss = 6.5817e-01, PNorm = 38.3388, GNorm = 1.9269, lr_0 = 3.4038e-04
Loss = 6.1974e-01, PNorm = 38.3459, GNorm = 1.3953, lr_0 = 3.4413e-04
Loss = 6.8952e-01, PNorm = 38.3528, GNorm = 10.8326, lr_0 = 3.4787e-04
Loss = 7.2044e-01, PNorm = 38.3554, GNorm = 1.6328, lr_0 = 3.5162e-04
Loss = 5.9269e-01, PNorm = 38.3596, GNorm = 2.5141, lr_0 = 3.5538e-04
Loss = 6.7023e-01, PNorm = 38.3663, GNorm = 2.5660, lr_0 = 3.5913e-04
Loss = 6.0185e-01, PNorm = 38.3702, GNorm = 3.2176, lr_0 = 3.6288e-04
Loss = 6.2597e-01, PNorm = 38.3770, GNorm = 8.7561, lr_0 = 3.6662e-04
Loss = 7.4762e-01, PNorm = 38.3825, GNorm = 1.6208, lr_0 = 3.7037e-04
Loss = 5.8767e-01, PNorm = 38.3907, GNorm = 1.8354, lr_0 = 3.7413e-04
Loss = 6.8157e-01, PNorm = 38.3975, GNorm = 2.2674, lr_0 = 3.7788e-04
Loss = 6.9467e-01, PNorm = 38.4003, GNorm = 1.9149, lr_0 = 3.8163e-04
Loss = 6.6732e-01, PNorm = 38.4055, GNorm = 11.2444, lr_0 = 3.8537e-04
Loss = 7.1860e-01, PNorm = 38.4131, GNorm = 2.2306, lr_0 = 3.8912e-04
Loss = 7.0676e-01, PNorm = 38.4185, GNorm = 1.7376, lr_0 = 3.9287e-04
Loss = 6.5190e-01, PNorm = 38.4242, GNorm = 4.7660, lr_0 = 3.9663e-04
Loss = 6.5088e-01, PNorm = 38.4315, GNorm = 2.4422, lr_0 = 4.0038e-04
Loss = 6.4426e-01, PNorm = 38.4378, GNorm = 2.3434, lr_0 = 4.0413e-04
Loss = 6.8683e-01, PNorm = 38.4424, GNorm = 1.1560, lr_0 = 4.0787e-04
Loss = 7.4646e-01, PNorm = 38.4475, GNorm = 4.9434, lr_0 = 4.1162e-04
Loss = 5.8262e-01, PNorm = 38.4541, GNorm = 4.0642, lr_0 = 4.1537e-04
Loss = 6.8700e-01, PNorm = 38.4618, GNorm = 4.3527, lr_0 = 4.1913e-04
Loss = 7.1256e-01, PNorm = 38.4708, GNorm = 4.1606, lr_0 = 4.2288e-04
Loss = 6.1951e-01, PNorm = 38.4795, GNorm = 2.6198, lr_0 = 4.2662e-04
Loss = 6.2814e-01, PNorm = 38.4845, GNorm = 1.1669, lr_0 = 4.3037e-04
Loss = 6.8047e-01, PNorm = 38.4882, GNorm = 6.7110, lr_0 = 4.3412e-04
Loss = 6.5782e-01, PNorm = 38.4967, GNorm = 3.7649, lr_0 = 4.3788e-04
Loss = 7.5248e-01, PNorm = 38.5089, GNorm = 2.6180, lr_0 = 4.4163e-04
Loss = 5.9056e-01, PNorm = 38.5164, GNorm = 5.8889, lr_0 = 4.4538e-04
Loss = 6.3109e-01, PNorm = 38.5273, GNorm = 4.3236, lr_0 = 4.4912e-04
Loss = 6.2712e-01, PNorm = 38.5339, GNorm = 3.3906, lr_0 = 4.5287e-04
Loss = 7.0325e-01, PNorm = 38.5423, GNorm = 6.8903, lr_0 = 4.5662e-04
Loss = 7.1683e-01, PNorm = 38.5516, GNorm = 4.2209, lr_0 = 4.6038e-04
Loss = 5.4572e-01, PNorm = 38.5607, GNorm = 1.6804, lr_0 = 4.6413e-04
Loss = 6.2292e-01, PNorm = 38.5674, GNorm = 3.9922, lr_0 = 4.6787e-04
Loss = 6.4585e-01, PNorm = 38.5723, GNorm = 5.7934, lr_0 = 4.7162e-04
Loss = 7.3773e-01, PNorm = 38.5792, GNorm = 1.2918, lr_0 = 4.7537e-04
Loss = 6.5134e-01, PNorm = 38.5863, GNorm = 2.2336, lr_0 = 4.7913e-04
Loss = 6.9636e-01, PNorm = 38.5976, GNorm = 1.4500, lr_0 = 4.8288e-04
Loss = 7.4782e-01, PNorm = 38.6109, GNorm = 3.8913, lr_0 = 4.8663e-04
Loss = 5.9568e-01, PNorm = 38.6155, GNorm = 1.4493, lr_0 = 4.9038e-04
Loss = 6.8667e-01, PNorm = 38.6284, GNorm = 5.2607, lr_0 = 4.9412e-04
Loss = 6.4148e-01, PNorm = 38.6372, GNorm = 2.1489, lr_0 = 4.9788e-04
Loss = 6.7993e-01, PNorm = 38.6493, GNorm = 1.3105, lr_0 = 5.0163e-04
Loss = 5.2866e-01, PNorm = 38.6596, GNorm = 2.4205, lr_0 = 5.0538e-04
Loss = 6.9277e-01, PNorm = 38.6659, GNorm = 1.2029, lr_0 = 5.0913e-04
Loss = 5.6911e-01, PNorm = 38.6775, GNorm = 5.6753, lr_0 = 5.1287e-04
Loss = 6.6505e-01, PNorm = 38.6866, GNorm = 2.0866, lr_0 = 5.1663e-04
Loss = 5.4713e-01, PNorm = 38.6905, GNorm = 1.5285, lr_0 = 5.2038e-04
Loss = 5.8088e-01, PNorm = 38.6994, GNorm = 2.5411, lr_0 = 5.2413e-04
Loss = 6.2442e-01, PNorm = 38.7071, GNorm = 1.5724, lr_0 = 5.2788e-04
Loss = 6.0436e-01, PNorm = 38.7182, GNorm = 4.6261, lr_0 = 5.3162e-04
Loss = 6.1175e-01, PNorm = 38.7234, GNorm = 1.7402, lr_0 = 5.3538e-04
Loss = 6.9561e-01, PNorm = 38.7281, GNorm = 4.2666, lr_0 = 5.3912e-04
Loss = 5.9495e-01, PNorm = 38.7396, GNorm = 2.3937, lr_0 = 5.4288e-04
Loss = 5.7351e-01, PNorm = 38.7471, GNorm = 2.3700, lr_0 = 5.4663e-04
Loss = 6.2904e-01, PNorm = 38.7586, GNorm = 4.7312, lr_0 = 5.5038e-04
Validation mae = 0.142731
Epoch 1
Loss = 6.1397e-01, PNorm = 38.7690, GNorm = 2.4844, lr_0 = 5.5413e-04
Loss = 6.1087e-01, PNorm = 38.7750, GNorm = 3.8780, lr_0 = 5.5787e-04
Loss = 6.1158e-01, PNorm = 38.7812, GNorm = 5.6461, lr_0 = 5.6163e-04
Loss = 6.6741e-01, PNorm = 38.7928, GNorm = 2.7725, lr_0 = 5.6538e-04
Loss = 6.4638e-01, PNorm = 38.8078, GNorm = 5.2648, lr_0 = 5.6913e-04
Loss = 7.1362e-01, PNorm = 38.8239, GNorm = 0.9127, lr_0 = 5.7288e-04
Loss = 6.6441e-01, PNorm = 38.8353, GNorm = 4.0158, lr_0 = 5.7662e-04
Loss = 6.9136e-01, PNorm = 38.8449, GNorm = 2.5079, lr_0 = 5.8038e-04
Loss = 5.2013e-01, PNorm = 38.8624, GNorm = 1.2005, lr_0 = 5.8413e-04
Loss = 6.8119e-01, PNorm = 38.8751, GNorm = 1.8654, lr_0 = 5.8788e-04
Loss = 7.3604e-01, PNorm = 38.8904, GNorm = 4.7346, lr_0 = 5.9163e-04
Loss = 6.1766e-01, PNorm = 38.9038, GNorm = 1.5117, lr_0 = 5.9538e-04
Loss = 6.3090e-01, PNorm = 38.9171, GNorm = 3.3079, lr_0 = 5.9913e-04
Loss = 6.0078e-01, PNorm = 38.9267, GNorm = 2.0392, lr_0 = 6.0288e-04
Loss = 5.7934e-01, PNorm = 38.9368, GNorm = 3.6479, lr_0 = 6.0663e-04
Loss = 6.0627e-01, PNorm = 38.9476, GNorm = 1.7199, lr_0 = 6.1038e-04
Loss = 5.1361e-01, PNorm = 38.9536, GNorm = 3.1432, lr_0 = 6.1413e-04
Loss = 5.2942e-01, PNorm = 38.9643, GNorm = 1.4563, lr_0 = 6.1788e-04
Loss = 6.3510e-01, PNorm = 38.9754, GNorm = 5.2285, lr_0 = 6.2163e-04
Loss = 6.5198e-01, PNorm = 38.9832, GNorm = 1.0054, lr_0 = 6.2538e-04
Loss = 6.1109e-01, PNorm = 38.9966, GNorm = 1.8460, lr_0 = 6.2913e-04
Loss = 5.7420e-01, PNorm = 39.0126, GNorm = 4.3852, lr_0 = 6.3288e-04
Loss = 6.7258e-01, PNorm = 39.0298, GNorm = 1.5933, lr_0 = 6.3663e-04
Loss = 5.3263e-01, PNorm = 39.0380, GNorm = 1.2547, lr_0 = 6.4038e-04
Loss = 5.8068e-01, PNorm = 39.0429, GNorm = 2.1277, lr_0 = 6.4413e-04
Loss = 5.2486e-01, PNorm = 39.0521, GNorm = 5.7835, lr_0 = 6.4788e-04
Loss = 6.0420e-01, PNorm = 39.0625, GNorm = 1.5767, lr_0 = 6.5163e-04
Loss = 5.7363e-01, PNorm = 39.0764, GNorm = 3.2570, lr_0 = 6.5538e-04
Loss = 5.0651e-01, PNorm = 39.0817, GNorm = 2.6919, lr_0 = 6.5913e-04
Loss = 5.6699e-01, PNorm = 39.0902, GNorm = 2.1028, lr_0 = 6.6288e-04
Loss = 6.6072e-01, PNorm = 39.1067, GNorm = 3.9547, lr_0 = 6.6663e-04
Loss = 6.0456e-01, PNorm = 39.1155, GNorm = 2.7748, lr_0 = 6.7038e-04
Loss = 5.8937e-01, PNorm = 39.1299, GNorm = 1.9901, lr_0 = 6.7413e-04
Loss = 5.6882e-01, PNorm = 39.1366, GNorm = 1.3786, lr_0 = 6.7788e-04
Loss = 6.0162e-01, PNorm = 39.1462, GNorm = 1.3086, lr_0 = 6.8163e-04
Loss = 5.5536e-01, PNorm = 39.1653, GNorm = 1.3458, lr_0 = 6.8538e-04
Loss = 5.6287e-01, PNorm = 39.1774, GNorm = 2.0451, lr_0 = 6.8913e-04
Loss = 6.3114e-01, PNorm = 39.1861, GNorm = 2.3279, lr_0 = 6.9288e-04
Loss = 5.7313e-01, PNorm = 39.2010, GNorm = 1.8804, lr_0 = 6.9663e-04
Loss = 6.5694e-01, PNorm = 39.2091, GNorm = 2.4296, lr_0 = 7.0038e-04
Loss = 6.3771e-01, PNorm = 39.2221, GNorm = 1.7772, lr_0 = 7.0413e-04
Loss = 6.2204e-01, PNorm = 39.2315, GNorm = 4.2606, lr_0 = 7.0788e-04
Loss = 5.4316e-01, PNorm = 39.2464, GNorm = 3.7378, lr_0 = 7.1163e-04
Loss = 6.0812e-01, PNorm = 39.2533, GNorm = 1.7814, lr_0 = 7.1538e-04
Loss = 6.6570e-01, PNorm = 39.2716, GNorm = 1.2138, lr_0 = 7.1913e-04
Loss = 6.1256e-01, PNorm = 39.2922, GNorm = 4.9500, lr_0 = 7.2288e-04
Loss = 5.0855e-01, PNorm = 39.3096, GNorm = 1.2515, lr_0 = 7.2663e-04
Loss = 6.6366e-01, PNorm = 39.3133, GNorm = 1.4528, lr_0 = 7.3038e-04
Loss = 6.2768e-01, PNorm = 39.3276, GNorm = 4.1414, lr_0 = 7.3413e-04
Loss = 6.8142e-01, PNorm = 39.3493, GNorm = 1.9560, lr_0 = 7.3788e-04
Loss = 6.3580e-01, PNorm = 39.3636, GNorm = 1.0654, lr_0 = 7.4163e-04
Loss = 6.1006e-01, PNorm = 39.3757, GNorm = 1.1740, lr_0 = 7.4538e-04
Loss = 5.5719e-01, PNorm = 39.3858, GNorm = 2.2165, lr_0 = 7.4913e-04
Loss = 6.1748e-01, PNorm = 39.3981, GNorm = 1.6086, lr_0 = 7.5288e-04
Loss = 5.7008e-01, PNorm = 39.4152, GNorm = 1.0877, lr_0 = 7.5663e-04
Loss = 7.1421e-01, PNorm = 39.4337, GNorm = 3.0155, lr_0 = 7.6038e-04
Loss = 5.0763e-01, PNorm = 39.4514, GNorm = 2.2088, lr_0 = 7.6413e-04
Loss = 6.1730e-01, PNorm = 39.4714, GNorm = 3.9301, lr_0 = 7.6788e-04
Loss = 5.6956e-01, PNorm = 39.4859, GNorm = 2.6300, lr_0 = 7.7163e-04
Loss = 6.3335e-01, PNorm = 39.4965, GNorm = 1.4538, lr_0 = 7.7538e-04
Loss = 6.1816e-01, PNorm = 39.5118, GNorm = 4.5872, lr_0 = 7.7913e-04
Loss = 6.1921e-01, PNorm = 39.5268, GNorm = 2.5316, lr_0 = 7.8288e-04
Loss = 6.1779e-01, PNorm = 39.5373, GNorm = 1.3509, lr_0 = 7.8663e-04
Loss = 5.5745e-01, PNorm = 39.5486, GNorm = 1.8900, lr_0 = 7.9038e-04
Loss = 6.0045e-01, PNorm = 39.5626, GNorm = 1.8169, lr_0 = 7.9413e-04
Loss = 6.2442e-01, PNorm = 39.5821, GNorm = 1.2091, lr_0 = 7.9788e-04
Loss = 5.3861e-01, PNorm = 39.5945, GNorm = 1.3741, lr_0 = 8.0163e-04
Loss = 4.8278e-01, PNorm = 39.6118, GNorm = 1.4602, lr_0 = 8.0538e-04
Loss = 5.7517e-01, PNorm = 39.6248, GNorm = 5.8110, lr_0 = 8.0913e-04
Loss = 6.0707e-01, PNorm = 39.6447, GNorm = 4.5083, lr_0 = 8.1288e-04
Loss = 5.3845e-01, PNorm = 39.6673, GNorm = 1.1006, lr_0 = 8.1663e-04
Loss = 6.2738e-01, PNorm = 39.6855, GNorm = 3.9255, lr_0 = 8.2038e-04
Loss = 5.9062e-01, PNorm = 39.7028, GNorm = 3.0297, lr_0 = 8.2413e-04
Loss = 5.6034e-01, PNorm = 39.7213, GNorm = 2.4862, lr_0 = 8.2788e-04
Loss = 5.5973e-01, PNorm = 39.7378, GNorm = 1.5287, lr_0 = 8.3163e-04
Loss = 6.5951e-01, PNorm = 39.7579, GNorm = 1.5061, lr_0 = 8.3538e-04
Loss = 5.5949e-01, PNorm = 39.7728, GNorm = 1.2225, lr_0 = 8.3913e-04
Loss = 5.8381e-01, PNorm = 39.7900, GNorm = 1.4026, lr_0 = 8.4288e-04
Loss = 6.3657e-01, PNorm = 39.8070, GNorm = 4.5708, lr_0 = 8.4663e-04
Loss = 5.2723e-01, PNorm = 39.8322, GNorm = 1.2339, lr_0 = 8.5038e-04
Loss = 5.9496e-01, PNorm = 39.8455, GNorm = 2.5354, lr_0 = 8.5413e-04
Loss = 6.2978e-01, PNorm = 39.8616, GNorm = 2.9456, lr_0 = 8.5788e-04
Loss = 6.4826e-01, PNorm = 39.8793, GNorm = 3.6561, lr_0 = 8.6163e-04
Loss = 5.6389e-01, PNorm = 39.8986, GNorm = 2.9880, lr_0 = 8.6538e-04
Loss = 5.6669e-01, PNorm = 39.9167, GNorm = 2.9234, lr_0 = 8.6913e-04
Loss = 5.8327e-01, PNorm = 39.9341, GNorm = 4.1437, lr_0 = 8.7288e-04
Loss = 6.0140e-01, PNorm = 39.9529, GNorm = 4.5126, lr_0 = 8.7663e-04
Loss = 6.4885e-01, PNorm = 39.9702, GNorm = 3.7058, lr_0 = 8.8038e-04
Loss = 5.8158e-01, PNorm = 39.9988, GNorm = 1.2847, lr_0 = 8.8413e-04
Loss = 6.2899e-01, PNorm = 40.0110, GNorm = 2.5010, lr_0 = 8.8788e-04
Loss = 6.4539e-01, PNorm = 40.0343, GNorm = 2.9422, lr_0 = 8.9163e-04
Loss = 5.9275e-01, PNorm = 40.0515, GNorm = 1.4297, lr_0 = 8.9538e-04
Loss = 5.6754e-01, PNorm = 40.0769, GNorm = 1.0527, lr_0 = 8.9913e-04
Loss = 5.9784e-01, PNorm = 40.0922, GNorm = 2.2827, lr_0 = 9.0288e-04
Loss = 5.1480e-01, PNorm = 40.1059, GNorm = 1.4638, lr_0 = 9.0663e-04
Loss = 5.8503e-01, PNorm = 40.1262, GNorm = 2.1185, lr_0 = 9.1038e-04
Loss = 6.0445e-01, PNorm = 40.1368, GNorm = 4.4315, lr_0 = 9.1413e-04
Loss = 5.8543e-01, PNorm = 40.1513, GNorm = 1.3344, lr_0 = 9.1788e-04
Loss = 5.4802e-01, PNorm = 40.1638, GNorm = 0.8820, lr_0 = 9.2163e-04
Loss = 5.8597e-01, PNorm = 40.1906, GNorm = 2.8354, lr_0 = 9.2538e-04
Loss = 5.7638e-01, PNorm = 40.2131, GNorm = 1.7373, lr_0 = 9.2913e-04
Loss = 5.5740e-01, PNorm = 40.2362, GNorm = 1.1678, lr_0 = 9.3288e-04
Loss = 5.2230e-01, PNorm = 40.2527, GNorm = 1.5838, lr_0 = 9.3663e-04
Loss = 6.0857e-01, PNorm = 40.2571, GNorm = 0.9590, lr_0 = 9.4038e-04
Loss = 6.0466e-01, PNorm = 40.2763, GNorm = 1.3968, lr_0 = 9.4413e-04
Loss = 5.0706e-01, PNorm = 40.2986, GNorm = 3.1666, lr_0 = 9.4788e-04
Loss = 6.1737e-01, PNorm = 40.3144, GNorm = 1.8251, lr_0 = 9.5163e-04
Loss = 5.7821e-01, PNorm = 40.3383, GNorm = 1.2197, lr_0 = 9.5538e-04
Loss = 5.4059e-01, PNorm = 40.3495, GNorm = 4.7353, lr_0 = 9.5913e-04
Loss = 5.7400e-01, PNorm = 40.3693, GNorm = 2.2631, lr_0 = 9.6288e-04
Loss = 6.2453e-01, PNorm = 40.3836, GNorm = 3.5532, lr_0 = 9.6663e-04
Loss = 5.8055e-01, PNorm = 40.4084, GNorm = 5.8321, lr_0 = 9.7038e-04
Loss = 8.0118e-01, PNorm = 40.4260, GNorm = 5.5356, lr_0 = 9.7413e-04
Loss = 6.2072e-01, PNorm = 40.4510, GNorm = 1.9664, lr_0 = 9.7788e-04
Loss = 5.4783e-01, PNorm = 40.4831, GNorm = 1.7433, lr_0 = 9.8163e-04
Loss = 4.8337e-01, PNorm = 40.5060, GNorm = 1.6749, lr_0 = 9.8537e-04
Loss = 5.7865e-01, PNorm = 40.5269, GNorm = 1.8269, lr_0 = 9.8912e-04
Loss = 5.4786e-01, PNorm = 40.5433, GNorm = 1.0174, lr_0 = 9.9288e-04
Loss = 5.5221e-01, PNorm = 40.5570, GNorm = 3.9792, lr_0 = 9.9663e-04
Loss = 6.9990e-01, PNorm = 40.5756, GNorm = 1.5579, lr_0 = 9.9993e-04
Validation mae = 0.136754
Epoch 2
Loss = 5.5781e-01, PNorm = 40.5988, GNorm = 1.2575, lr_0 = 9.9925e-04
Loss = 5.5240e-01, PNorm = 40.6137, GNorm = 1.1509, lr_0 = 9.9856e-04
Loss = 5.8937e-01, PNorm = 40.6319, GNorm = 6.1400, lr_0 = 9.9788e-04
Loss = 5.9843e-01, PNorm = 40.6388, GNorm = 1.9934, lr_0 = 9.9719e-04
Loss = 6.9295e-01, PNorm = 40.6640, GNorm = 1.3892, lr_0 = 9.9651e-04
Loss = 5.5495e-01, PNorm = 40.6976, GNorm = 2.3713, lr_0 = 9.9583e-04
Loss = 5.3196e-01, PNorm = 40.7182, GNorm = 3.2272, lr_0 = 9.9515e-04
Loss = 6.1185e-01, PNorm = 40.7404, GNorm = 0.9447, lr_0 = 9.9446e-04
Loss = 4.9040e-01, PNorm = 40.7686, GNorm = 1.7738, lr_0 = 9.9378e-04
Loss = 6.0388e-01, PNorm = 40.7890, GNorm = 2.4248, lr_0 = 9.9310e-04
Loss = 5.2330e-01, PNorm = 40.8079, GNorm = 0.8931, lr_0 = 9.9242e-04
Loss = 5.2244e-01, PNorm = 40.8373, GNorm = 1.3478, lr_0 = 9.9174e-04
Loss = 5.1898e-01, PNorm = 40.8614, GNorm = 1.0444, lr_0 = 9.9106e-04
Loss = 5.1363e-01, PNorm = 40.8827, GNorm = 3.1927, lr_0 = 9.9038e-04
Loss = 5.9735e-01, PNorm = 40.9060, GNorm = 5.3903, lr_0 = 9.8971e-04
Loss = 6.2150e-01, PNorm = 40.9246, GNorm = 1.2684, lr_0 = 9.8903e-04
Loss = 5.8396e-01, PNorm = 40.9572, GNorm = 2.4381, lr_0 = 9.8835e-04
Loss = 4.8140e-01, PNorm = 40.9803, GNorm = 1.2259, lr_0 = 9.8767e-04
Loss = 5.2142e-01, PNorm = 40.9976, GNorm = 3.2099, lr_0 = 9.8700e-04
Loss = 5.4835e-01, PNorm = 41.0193, GNorm = 1.8132, lr_0 = 9.8632e-04
Loss = 5.2252e-01, PNorm = 41.0321, GNorm = 1.4277, lr_0 = 9.8564e-04
Loss = 5.9236e-01, PNorm = 41.0348, GNorm = 1.6541, lr_0 = 9.8497e-04
Loss = 6.0208e-01, PNorm = 41.0542, GNorm = 1.1635, lr_0 = 9.8429e-04
Loss = 5.5291e-01, PNorm = 41.0707, GNorm = 2.7830, lr_0 = 9.8362e-04
Loss = 5.5125e-01, PNorm = 41.0842, GNorm = 1.2095, lr_0 = 9.8295e-04
Loss = 6.3170e-01, PNorm = 41.1062, GNorm = 3.4957, lr_0 = 9.8227e-04
Loss = 5.7774e-01, PNorm = 41.1344, GNorm = 1.4819, lr_0 = 9.8160e-04
Loss = 4.7403e-01, PNorm = 41.1671, GNorm = 1.1059, lr_0 = 9.8093e-04
Loss = 5.8322e-01, PNorm = 41.1813, GNorm = 3.4096, lr_0 = 9.8026e-04
Loss = 6.1772e-01, PNorm = 41.2056, GNorm = 0.9866, lr_0 = 9.7958e-04
Loss = 5.2126e-01, PNorm = 41.2332, GNorm = 1.4162, lr_0 = 9.7891e-04
Loss = 5.2329e-01, PNorm = 41.2574, GNorm = 1.4031, lr_0 = 9.7824e-04
Loss = 5.4021e-01, PNorm = 41.2744, GNorm = 2.1386, lr_0 = 9.7757e-04
Loss = 5.6732e-01, PNorm = 41.2859, GNorm = 1.4282, lr_0 = 9.7690e-04
Loss = 5.3089e-01, PNorm = 41.3042, GNorm = 0.8844, lr_0 = 9.7623e-04
Loss = 5.8293e-01, PNorm = 41.3215, GNorm = 1.2468, lr_0 = 9.7556e-04
Loss = 5.0472e-01, PNorm = 41.3410, GNorm = 1.3090, lr_0 = 9.7490e-04
Loss = 5.9624e-01, PNorm = 41.3665, GNorm = 1.4140, lr_0 = 9.7423e-04
Loss = 4.6886e-01, PNorm = 41.3948, GNorm = 1.0091, lr_0 = 9.7356e-04
Loss = 6.4925e-01, PNorm = 41.4206, GNorm = 1.6734, lr_0 = 9.7289e-04
Loss = 5.2446e-01, PNorm = 41.4340, GNorm = 0.9698, lr_0 = 9.7223e-04
Loss = 5.0441e-01, PNorm = 41.4440, GNorm = 2.5441, lr_0 = 9.7156e-04
Loss = 5.0170e-01, PNorm = 41.4596, GNorm = 0.8771, lr_0 = 9.7090e-04
Loss = 5.1681e-01, PNorm = 41.4837, GNorm = 2.3747, lr_0 = 9.7023e-04
Loss = 4.7949e-01, PNorm = 41.5002, GNorm = 1.3918, lr_0 = 9.6957e-04
Loss = 4.6250e-01, PNorm = 41.5216, GNorm = 1.5405, lr_0 = 9.6890e-04
Loss = 5.9125e-01, PNorm = 41.5459, GNorm = 2.4021, lr_0 = 9.6824e-04
Loss = 5.4101e-01, PNorm = 41.5820, GNorm = 2.1852, lr_0 = 9.6757e-04
Loss = 5.1725e-01, PNorm = 41.5941, GNorm = 1.3502, lr_0 = 9.6691e-04
Loss = 4.7267e-01, PNorm = 41.6084, GNorm = 1.6655, lr_0 = 9.6625e-04
Loss = 5.5507e-01, PNorm = 41.6295, GNorm = 1.9026, lr_0 = 9.6559e-04
Loss = 5.0144e-01, PNorm = 41.6484, GNorm = 1.6044, lr_0 = 9.6493e-04
Loss = 5.4156e-01, PNorm = 41.6682, GNorm = 1.5184, lr_0 = 9.6427e-04
Loss = 5.5246e-01, PNorm = 41.6889, GNorm = 1.5973, lr_0 = 9.6360e-04
Loss = 6.1696e-01, PNorm = 41.7112, GNorm = 3.6824, lr_0 = 9.6294e-04
Loss = 5.5170e-01, PNorm = 41.7310, GNorm = 1.1271, lr_0 = 9.6228e-04
Loss = 4.2831e-01, PNorm = 41.7516, GNorm = 1.8178, lr_0 = 9.6163e-04
Loss = 6.5151e-01, PNorm = 41.7664, GNorm = 1.3039, lr_0 = 9.6097e-04
Loss = 4.9765e-01, PNorm = 41.7824, GNorm = 1.1604, lr_0 = 9.6031e-04
Loss = 5.3127e-01, PNorm = 41.8000, GNorm = 1.9499, lr_0 = 9.5965e-04
Loss = 6.2644e-01, PNorm = 41.8256, GNorm = 1.2942, lr_0 = 9.5899e-04
Loss = 5.6467e-01, PNorm = 41.8411, GNorm = 1.5880, lr_0 = 9.5834e-04
Loss = 4.6010e-01, PNorm = 41.8698, GNorm = 1.0613, lr_0 = 9.5768e-04
Loss = 5.4791e-01, PNorm = 41.8863, GNorm = 2.2896, lr_0 = 9.5702e-04
Loss = 6.0685e-01, PNorm = 41.9089, GNorm = 1.8777, lr_0 = 9.5637e-04
Loss = 5.5112e-01, PNorm = 41.9332, GNorm = 1.4165, lr_0 = 9.5571e-04
Loss = 5.1644e-01, PNorm = 41.9385, GNorm = 0.9363, lr_0 = 9.5506e-04
Loss = 5.0713e-01, PNorm = 41.9537, GNorm = 1.1966, lr_0 = 9.5440e-04
Loss = 5.7396e-01, PNorm = 41.9732, GNorm = 6.4203, lr_0 = 9.5375e-04
Loss = 5.5493e-01, PNorm = 41.9957, GNorm = 2.2091, lr_0 = 9.5310e-04
Loss = 5.2968e-01, PNorm = 42.0174, GNorm = 1.1484, lr_0 = 9.5244e-04
Loss = 4.7859e-01, PNorm = 42.0300, GNorm = 1.4846, lr_0 = 9.5179e-04
Loss = 4.9773e-01, PNorm = 42.0538, GNorm = 1.2533, lr_0 = 9.5114e-04
Loss = 5.7893e-01, PNorm = 42.0777, GNorm = 3.6600, lr_0 = 9.5049e-04
Loss = 5.3428e-01, PNorm = 42.0901, GNorm = 2.9716, lr_0 = 9.4984e-04
Loss = 5.7519e-01, PNorm = 42.1044, GNorm = 1.5833, lr_0 = 9.4919e-04
Loss = 5.8946e-01, PNorm = 42.1270, GNorm = 1.5933, lr_0 = 9.4854e-04
Loss = 5.2676e-01, PNorm = 42.1564, GNorm = 1.2699, lr_0 = 9.4789e-04
Loss = 5.6938e-01, PNorm = 42.1856, GNorm = 1.6738, lr_0 = 9.4724e-04
Loss = 5.2575e-01, PNorm = 42.2035, GNorm = 2.4036, lr_0 = 9.4659e-04
Loss = 5.5792e-01, PNorm = 42.2231, GNorm = 1.8075, lr_0 = 9.4594e-04
Loss = 5.6136e-01, PNorm = 42.2334, GNorm = 1.0424, lr_0 = 9.4529e-04
Loss = 5.3152e-01, PNorm = 42.2544, GNorm = 1.6957, lr_0 = 9.4464e-04
Loss = 5.3553e-01, PNorm = 42.2700, GNorm = 2.6818, lr_0 = 9.4400e-04
Loss = 5.7135e-01, PNorm = 42.2877, GNorm = 2.4721, lr_0 = 9.4335e-04
Loss = 5.8564e-01, PNorm = 42.3118, GNorm = 4.7289, lr_0 = 9.4270e-04
Loss = 5.4825e-01, PNorm = 42.3230, GNorm = 2.3929, lr_0 = 9.4206e-04
Loss = 6.3073e-01, PNorm = 42.3409, GNorm = 1.8282, lr_0 = 9.4141e-04
Loss = 4.9498e-01, PNorm = 42.3628, GNorm = 1.2354, lr_0 = 9.4077e-04
Loss = 5.2620e-01, PNorm = 42.3867, GNorm = 2.8139, lr_0 = 9.4012e-04
Loss = 4.4760e-01, PNorm = 42.4010, GNorm = 1.6351, lr_0 = 9.3948e-04
Loss = 4.5403e-01, PNorm = 42.4097, GNorm = 0.7733, lr_0 = 9.3884e-04
Loss = 5.5482e-01, PNorm = 42.4207, GNorm = 1.2494, lr_0 = 9.3819e-04
Loss = 5.0412e-01, PNorm = 42.4351, GNorm = 1.2998, lr_0 = 9.3755e-04
Loss = 6.9976e-01, PNorm = 42.4551, GNorm = 3.1605, lr_0 = 9.3691e-04
Loss = 4.4649e-01, PNorm = 42.4725, GNorm = 2.8120, lr_0 = 9.3627e-04
Loss = 4.8275e-01, PNorm = 42.4919, GNorm = 1.2946, lr_0 = 9.3562e-04
Loss = 5.2468e-01, PNorm = 42.5165, GNorm = 2.9223, lr_0 = 9.3498e-04
Loss = 5.0535e-01, PNorm = 42.5219, GNorm = 2.0395, lr_0 = 9.3434e-04
Loss = 4.8324e-01, PNorm = 42.5332, GNorm = 1.2201, lr_0 = 9.3370e-04
Loss = 4.9330e-01, PNorm = 42.5516, GNorm = 1.2450, lr_0 = 9.3306e-04
Loss = 5.6645e-01, PNorm = 42.5700, GNorm = 2.6147, lr_0 = 9.3242e-04
Loss = 5.6240e-01, PNorm = 42.5927, GNorm = 2.3874, lr_0 = 9.3178e-04
Loss = 5.1525e-01, PNorm = 42.6183, GNorm = 1.2994, lr_0 = 9.3115e-04
Loss = 5.0874e-01, PNorm = 42.6405, GNorm = 1.7975, lr_0 = 9.3051e-04
Loss = 5.9035e-01, PNorm = 42.6630, GNorm = 4.7420, lr_0 = 9.2987e-04
Loss = 6.1443e-01, PNorm = 42.6854, GNorm = 4.3668, lr_0 = 9.2923e-04
Loss = 4.9758e-01, PNorm = 42.7036, GNorm = 1.1489, lr_0 = 9.2860e-04
Loss = 6.3285e-01, PNorm = 42.7271, GNorm = 0.8603, lr_0 = 9.2796e-04
Loss = 4.6294e-01, PNorm = 42.7505, GNorm = 2.3569, lr_0 = 9.2733e-04
Loss = 4.7007e-01, PNorm = 42.7821, GNorm = 4.1107, lr_0 = 9.2669e-04
Loss = 5.1774e-01, PNorm = 42.8011, GNorm = 1.5446, lr_0 = 9.2606e-04
Loss = 6.1054e-01, PNorm = 42.8228, GNorm = 1.5095, lr_0 = 9.2542e-04
Loss = 5.2642e-01, PNorm = 42.8460, GNorm = 1.4278, lr_0 = 9.2479e-04
Loss = 5.1488e-01, PNorm = 42.8728, GNorm = 2.2160, lr_0 = 9.2415e-04
Loss = 4.6222e-01, PNorm = 42.8901, GNorm = 2.0918, lr_0 = 9.2352e-04
Loss = 5.4694e-01, PNorm = 42.9051, GNorm = 1.7769, lr_0 = 9.2289e-04
Loss = 5.4930e-01, PNorm = 42.9274, GNorm = 1.7506, lr_0 = 9.2226e-04
Loss = 4.8785e-01, PNorm = 42.9489, GNorm = 0.9376, lr_0 = 9.2162e-04
Loss = 4.4593e-01, PNorm = 42.9653, GNorm = 1.7945, lr_0 = 9.2099e-04
Validation mae = 0.127836
Epoch 3
Loss = 5.4541e-01, PNorm = 42.9847, GNorm = 1.2418, lr_0 = 9.2036e-04
Loss = 5.5733e-01, PNorm = 43.0055, GNorm = 4.8993, lr_0 = 9.1973e-04
Loss = 5.3447e-01, PNorm = 43.0271, GNorm = 0.9189, lr_0 = 9.1910e-04
Loss = 4.6082e-01, PNorm = 43.0566, GNorm = 1.8903, lr_0 = 9.1847e-04
Loss = 5.2435e-01, PNorm = 43.0768, GNorm = 1.3915, lr_0 = 9.1784e-04
Loss = 4.8056e-01, PNorm = 43.0961, GNorm = 1.2478, lr_0 = 9.1721e-04
Loss = 4.9605e-01, PNorm = 43.1139, GNorm = 1.1396, lr_0 = 9.1658e-04
Loss = 5.2394e-01, PNorm = 43.1327, GNorm = 1.1805, lr_0 = 9.1596e-04
Loss = 5.2385e-01, PNorm = 43.1557, GNorm = 2.1834, lr_0 = 9.1533e-04
Loss = 5.0169e-01, PNorm = 43.1541, GNorm = 1.5976, lr_0 = 9.1470e-04
Loss = 5.4102e-01, PNorm = 43.1657, GNorm = 1.0501, lr_0 = 9.1408e-04
Loss = 4.8499e-01, PNorm = 43.1933, GNorm = 1.5790, lr_0 = 9.1345e-04
Loss = 4.6949e-01, PNorm = 43.2184, GNorm = 0.9204, lr_0 = 9.1282e-04
Loss = 5.1003e-01, PNorm = 43.2422, GNorm = 1.7869, lr_0 = 9.1220e-04
Loss = 5.0288e-01, PNorm = 43.2573, GNorm = 1.6466, lr_0 = 9.1157e-04
Loss = 4.9639e-01, PNorm = 43.2827, GNorm = 1.7258, lr_0 = 9.1095e-04
Loss = 5.1282e-01, PNorm = 43.3084, GNorm = 2.1588, lr_0 = 9.1032e-04
Loss = 5.8568e-01, PNorm = 43.3341, GNorm = 2.4532, lr_0 = 9.0970e-04
Loss = 5.2053e-01, PNorm = 43.3541, GNorm = 2.2123, lr_0 = 9.0908e-04
Loss = 5.2279e-01, PNorm = 43.3884, GNorm = 2.0100, lr_0 = 9.0846e-04
Loss = 5.5472e-01, PNorm = 43.4115, GNorm = 1.9742, lr_0 = 9.0783e-04
Loss = 5.4545e-01, PNorm = 43.4363, GNorm = 2.6320, lr_0 = 9.0721e-04
Loss = 4.8778e-01, PNorm = 43.4574, GNorm = 2.3292, lr_0 = 9.0659e-04
Loss = 4.5543e-01, PNorm = 43.4685, GNorm = 1.1519, lr_0 = 9.0597e-04
Loss = 5.5127e-01, PNorm = 43.4844, GNorm = 1.2391, lr_0 = 9.0535e-04
Loss = 5.3511e-01, PNorm = 43.5046, GNorm = 2.0741, lr_0 = 9.0473e-04
Loss = 6.3458e-01, PNorm = 43.5250, GNorm = 0.9010, lr_0 = 9.0411e-04
Loss = 5.4921e-01, PNorm = 43.5525, GNorm = 1.2271, lr_0 = 9.0349e-04
Loss = 4.9721e-01, PNorm = 43.5799, GNorm = 2.1020, lr_0 = 9.0287e-04
Loss = 5.1991e-01, PNorm = 43.6012, GNorm = 1.4182, lr_0 = 9.0225e-04
Loss = 4.7438e-01, PNorm = 43.6211, GNorm = 2.6040, lr_0 = 9.0163e-04
Loss = 5.2476e-01, PNorm = 43.6413, GNorm = 2.5531, lr_0 = 9.0102e-04
Loss = 5.5368e-01, PNorm = 43.6604, GNorm = 1.0559, lr_0 = 9.0040e-04
Loss = 4.8870e-01, PNorm = 43.6832, GNorm = 2.3372, lr_0 = 8.9978e-04
Loss = 4.7556e-01, PNorm = 43.6993, GNorm = 1.5858, lr_0 = 8.9916e-04
Loss = 5.2276e-01, PNorm = 43.7173, GNorm = 1.3730, lr_0 = 8.9855e-04
Loss = 5.4785e-01, PNorm = 43.7376, GNorm = 1.3426, lr_0 = 8.9793e-04
Loss = 4.8310e-01, PNorm = 43.7559, GNorm = 1.6019, lr_0 = 8.9732e-04
Loss = 4.9001e-01, PNorm = 43.7852, GNorm = 1.5607, lr_0 = 8.9670e-04
Loss = 5.5048e-01, PNorm = 43.8143, GNorm = 1.3567, lr_0 = 8.9609e-04
Loss = 5.2155e-01, PNorm = 43.8360, GNorm = 1.0331, lr_0 = 8.9548e-04
Loss = 5.0149e-01, PNorm = 43.8548, GNorm = 1.0332, lr_0 = 8.9486e-04
Loss = 5.2741e-01, PNorm = 43.8802, GNorm = 2.7968, lr_0 = 8.9425e-04
Loss = 5.0479e-01, PNorm = 43.9012, GNorm = 1.5327, lr_0 = 8.9364e-04
Loss = 5.1352e-01, PNorm = 43.9279, GNorm = 2.8051, lr_0 = 8.9302e-04
Loss = 5.1221e-01, PNorm = 43.9469, GNorm = 1.8574, lr_0 = 8.9241e-04
Loss = 5.1299e-01, PNorm = 43.9626, GNorm = 2.7016, lr_0 = 8.9180e-04
Loss = 5.4307e-01, PNorm = 43.9797, GNorm = 1.1609, lr_0 = 8.9119e-04
Loss = 5.3956e-01, PNorm = 43.9937, GNorm = 3.5882, lr_0 = 8.9058e-04
Loss = 6.3124e-01, PNorm = 44.0195, GNorm = 1.7126, lr_0 = 8.8997e-04
Loss = 4.7126e-01, PNorm = 44.0435, GNorm = 1.2080, lr_0 = 8.8936e-04
Loss = 4.7492e-01, PNorm = 44.0598, GNorm = 1.4377, lr_0 = 8.8875e-04
Loss = 5.1125e-01, PNorm = 44.0822, GNorm = 1.5278, lr_0 = 8.8814e-04
Loss = 4.8752e-01, PNorm = 44.1002, GNorm = 1.5500, lr_0 = 8.8753e-04
Loss = 4.8035e-01, PNorm = 44.1130, GNorm = 1.7554, lr_0 = 8.8693e-04
Loss = 4.8928e-01, PNorm = 44.1330, GNorm = 3.1877, lr_0 = 8.8632e-04
Loss = 5.6464e-01, PNorm = 44.1565, GNorm = 5.1866, lr_0 = 8.8571e-04
Loss = 5.1576e-01, PNorm = 44.1783, GNorm = 1.0526, lr_0 = 8.8510e-04
Loss = 4.9494e-01, PNorm = 44.2031, GNorm = 2.1041, lr_0 = 8.8450e-04
Loss = 5.4865e-01, PNorm = 44.2237, GNorm = 1.2554, lr_0 = 8.8389e-04
Loss = 4.2031e-01, PNorm = 44.2364, GNorm = 0.9711, lr_0 = 8.8329e-04
Loss = 5.5392e-01, PNorm = 44.2585, GNorm = 1.9678, lr_0 = 8.8268e-04
Loss = 5.0815e-01, PNorm = 44.2804, GNorm = 1.7591, lr_0 = 8.8208e-04
Loss = 5.1406e-01, PNorm = 44.3032, GNorm = 0.7109, lr_0 = 8.8147e-04
Loss = 6.0111e-01, PNorm = 44.3237, GNorm = 1.0247, lr_0 = 8.8087e-04
Loss = 5.0679e-01, PNorm = 44.3420, GNorm = 1.0488, lr_0 = 8.8026e-04
Loss = 5.5480e-01, PNorm = 44.3677, GNorm = 1.7891, lr_0 = 8.7966e-04
Loss = 5.4637e-01, PNorm = 44.3913, GNorm = 2.9260, lr_0 = 8.7906e-04
Loss = 4.5607e-01, PNorm = 44.4125, GNorm = 1.5429, lr_0 = 8.7846e-04
Loss = 5.0109e-01, PNorm = 44.4257, GNorm = 2.8546, lr_0 = 8.7785e-04
Loss = 4.8188e-01, PNorm = 44.4507, GNorm = 1.0470, lr_0 = 8.7725e-04
Loss = 5.4373e-01, PNorm = 44.4682, GNorm = 1.4598, lr_0 = 8.7665e-04
Loss = 5.5114e-01, PNorm = 44.4858, GNorm = 1.3342, lr_0 = 8.7605e-04
Loss = 4.8187e-01, PNorm = 44.5025, GNorm = 1.3280, lr_0 = 8.7545e-04
Loss = 4.6042e-01, PNorm = 44.5157, GNorm = 1.6267, lr_0 = 8.7485e-04
Loss = 5.6251e-01, PNorm = 44.5281, GNorm = 1.1537, lr_0 = 8.7425e-04
Loss = 5.6940e-01, PNorm = 44.5552, GNorm = 1.8560, lr_0 = 8.7365e-04
Loss = 4.8060e-01, PNorm = 44.5730, GNorm = 2.1386, lr_0 = 8.7306e-04
Loss = 5.3426e-01, PNorm = 44.5980, GNorm = 1.2765, lr_0 = 8.7246e-04
Loss = 4.8331e-01, PNorm = 44.6229, GNorm = 1.2684, lr_0 = 8.7186e-04
Loss = 5.3293e-01, PNorm = 44.6409, GNorm = 2.3044, lr_0 = 8.7126e-04
Loss = 4.6854e-01, PNorm = 44.6643, GNorm = 0.9636, lr_0 = 8.7067e-04
Loss = 4.3894e-01, PNorm = 44.6845, GNorm = 1.6234, lr_0 = 8.7007e-04
Loss = 4.3229e-01, PNorm = 44.7045, GNorm = 1.0215, lr_0 = 8.6947e-04
Loss = 4.8976e-01, PNorm = 44.7147, GNorm = 1.1068, lr_0 = 8.6888e-04
Loss = 4.1227e-01, PNorm = 44.7280, GNorm = 1.6221, lr_0 = 8.6828e-04
Loss = 4.6003e-01, PNorm = 44.7444, GNorm = 1.8399, lr_0 = 8.6769e-04
Loss = 4.6359e-01, PNorm = 44.7517, GNorm = 1.8434, lr_0 = 8.6709e-04
Loss = 5.0834e-01, PNorm = 44.7677, GNorm = 1.6603, lr_0 = 8.6650e-04
Loss = 5.0508e-01, PNorm = 44.7858, GNorm = 2.2038, lr_0 = 8.6590e-04
Loss = 4.9675e-01, PNorm = 44.8139, GNorm = 2.9849, lr_0 = 8.6531e-04
Loss = 4.7711e-01, PNorm = 44.8407, GNorm = 0.7136, lr_0 = 8.6472e-04
Loss = 5.0178e-01, PNorm = 44.8619, GNorm = 1.2740, lr_0 = 8.6413e-04
Loss = 4.8253e-01, PNorm = 44.8795, GNorm = 2.4134, lr_0 = 8.6353e-04
Loss = 5.0047e-01, PNorm = 44.8959, GNorm = 1.6096, lr_0 = 8.6294e-04
Loss = 4.4403e-01, PNorm = 44.9132, GNorm = 2.2245, lr_0 = 8.6235e-04
Loss = 4.3553e-01, PNorm = 44.9342, GNorm = 2.6478, lr_0 = 8.6176e-04
Loss = 4.8268e-01, PNorm = 44.9550, GNorm = 1.0220, lr_0 = 8.6117e-04
Loss = 5.1631e-01, PNorm = 44.9705, GNorm = 1.6494, lr_0 = 8.6058e-04
Loss = 5.3353e-01, PNorm = 44.9914, GNorm = 1.0743, lr_0 = 8.5999e-04
Loss = 5.5472e-01, PNorm = 45.0079, GNorm = 3.1631, lr_0 = 8.5940e-04
Loss = 4.6367e-01, PNorm = 45.0300, GNorm = 2.3418, lr_0 = 8.5881e-04
Loss = 5.2397e-01, PNorm = 45.0557, GNorm = 2.0946, lr_0 = 8.5823e-04
Loss = 4.9645e-01, PNorm = 45.0746, GNorm = 1.1530, lr_0 = 8.5764e-04
Loss = 4.5225e-01, PNorm = 45.0929, GNorm = 1.4785, lr_0 = 8.5705e-04
Loss = 4.7206e-01, PNorm = 45.1140, GNorm = 0.8256, lr_0 = 8.5646e-04
Loss = 4.9057e-01, PNorm = 45.1384, GNorm = 1.7024, lr_0 = 8.5588e-04
Loss = 5.1965e-01, PNorm = 45.1499, GNorm = 1.4976, lr_0 = 8.5529e-04
Loss = 5.2567e-01, PNorm = 45.1651, GNorm = 1.6916, lr_0 = 8.5470e-04
Loss = 5.4223e-01, PNorm = 45.1850, GNorm = 0.9688, lr_0 = 8.5412e-04
Loss = 5.3644e-01, PNorm = 45.2078, GNorm = 2.7586, lr_0 = 8.5353e-04
Loss = 4.9944e-01, PNorm = 45.2246, GNorm = 1.1695, lr_0 = 8.5295e-04
Loss = 5.0305e-01, PNorm = 45.2422, GNorm = 1.4575, lr_0 = 8.5236e-04
Loss = 4.6004e-01, PNorm = 45.2656, GNorm = 0.9495, lr_0 = 8.5178e-04
Loss = 5.4298e-01, PNorm = 45.2775, GNorm = 3.7200, lr_0 = 8.5120e-04
Loss = 4.6386e-01, PNorm = 45.3037, GNorm = 2.1758, lr_0 = 8.5061e-04
Loss = 4.8764e-01, PNorm = 45.3317, GNorm = 2.5194, lr_0 = 8.5003e-04
Loss = 5.2004e-01, PNorm = 45.3470, GNorm = 1.2498, lr_0 = 8.4945e-04
Loss = 4.4818e-01, PNorm = 45.3689, GNorm = 0.9187, lr_0 = 8.4887e-04
Loss = 5.1952e-01, PNorm = 45.3798, GNorm = 4.2816, lr_0 = 8.4828e-04
Validation mae = 0.123477
Epoch 4
Loss = 5.4766e-01, PNorm = 45.3915, GNorm = 2.9541, lr_0 = 8.4770e-04
Loss = 4.8067e-01, PNorm = 45.4181, GNorm = 3.6203, lr_0 = 8.4712e-04
Loss = 4.1543e-01, PNorm = 45.4387, GNorm = 1.0488, lr_0 = 8.4654e-04
Loss = 4.7952e-01, PNorm = 45.4566, GNorm = 1.2321, lr_0 = 8.4596e-04
Loss = 4.8443e-01, PNorm = 45.4722, GNorm = 2.0886, lr_0 = 8.4538e-04
Loss = 5.3976e-01, PNorm = 45.4860, GNorm = 1.0432, lr_0 = 8.4480e-04
Loss = 4.3702e-01, PNorm = 45.5072, GNorm = 2.4137, lr_0 = 8.4423e-04
Loss = 4.7744e-01, PNorm = 45.5245, GNorm = 1.2554, lr_0 = 8.4365e-04
Loss = 5.0662e-01, PNorm = 45.5351, GNorm = 1.3395, lr_0 = 8.4307e-04
Loss = 5.2198e-01, PNorm = 45.5494, GNorm = 1.4556, lr_0 = 8.4249e-04
Loss = 6.1743e-01, PNorm = 45.5759, GNorm = 3.3451, lr_0 = 8.4191e-04
Loss = 5.0099e-01, PNorm = 45.6006, GNorm = 1.3747, lr_0 = 8.4134e-04
Loss = 4.5153e-01, PNorm = 45.6165, GNorm = 1.4331, lr_0 = 8.4076e-04
Loss = 5.1514e-01, PNorm = 45.6403, GNorm = 1.3686, lr_0 = 8.4019e-04
Loss = 4.3039e-01, PNorm = 45.6625, GNorm = 1.0611, lr_0 = 8.3961e-04
Loss = 5.4236e-01, PNorm = 45.6778, GNorm = 1.3753, lr_0 = 8.3903e-04
Loss = 4.5101e-01, PNorm = 45.6924, GNorm = 1.5417, lr_0 = 8.3846e-04
Loss = 4.4537e-01, PNorm = 45.7041, GNorm = 1.5908, lr_0 = 8.3789e-04
Loss = 5.0933e-01, PNorm = 45.7251, GNorm = 1.5368, lr_0 = 8.3731e-04
Loss = 4.8424e-01, PNorm = 45.7363, GNorm = 1.1182, lr_0 = 8.3674e-04
Loss = 4.4436e-01, PNorm = 45.7536, GNorm = 0.9844, lr_0 = 8.3616e-04
Loss = 5.1067e-01, PNorm = 45.7734, GNorm = 2.0324, lr_0 = 8.3559e-04
Loss = 5.3189e-01, PNorm = 45.7909, GNorm = 1.5106, lr_0 = 8.3502e-04
Loss = 4.4491e-01, PNorm = 45.8206, GNorm = 1.1764, lr_0 = 8.3445e-04
Loss = 4.5484e-01, PNorm = 45.8389, GNorm = 0.8649, lr_0 = 8.3388e-04
Loss = 4.6888e-01, PNorm = 45.8617, GNorm = 1.2601, lr_0 = 8.3330e-04
Loss = 4.8142e-01, PNorm = 45.8811, GNorm = 2.0498, lr_0 = 8.3273e-04
Loss = 4.7542e-01, PNorm = 45.8991, GNorm = 1.3210, lr_0 = 8.3216e-04
Loss = 5.1834e-01, PNorm = 45.9189, GNorm = 3.2459, lr_0 = 8.3159e-04
Loss = 5.1976e-01, PNorm = 45.9407, GNorm = 2.3385, lr_0 = 8.3102e-04
Loss = 4.5330e-01, PNorm = 45.9634, GNorm = 1.3111, lr_0 = 8.3045e-04
Loss = 4.4396e-01, PNorm = 45.9786, GNorm = 1.3609, lr_0 = 8.2988e-04
Loss = 5.1328e-01, PNorm = 45.9908, GNorm = 1.3219, lr_0 = 8.2932e-04
Loss = 4.6622e-01, PNorm = 46.0097, GNorm = 1.1108, lr_0 = 8.2875e-04
Loss = 5.3958e-01, PNorm = 46.0248, GNorm = 2.9585, lr_0 = 8.2818e-04
Loss = 4.2040e-01, PNorm = 46.0376, GNorm = 1.1727, lr_0 = 8.2761e-04
Loss = 4.7705e-01, PNorm = 46.0596, GNorm = 1.3626, lr_0 = 8.2705e-04
Loss = 5.2021e-01, PNorm = 46.0815, GNorm = 1.8094, lr_0 = 8.2648e-04
Loss = 4.9514e-01, PNorm = 46.0948, GNorm = 2.8730, lr_0 = 8.2591e-04
Loss = 4.9550e-01, PNorm = 46.1115, GNorm = 1.3877, lr_0 = 8.2535e-04
Loss = 5.1336e-01, PNorm = 46.1269, GNorm = 1.4042, lr_0 = 8.2478e-04
Loss = 4.6790e-01, PNorm = 46.1465, GNorm = 1.5306, lr_0 = 8.2422e-04
Loss = 4.9335e-01, PNorm = 46.1625, GNorm = 0.9171, lr_0 = 8.2365e-04
Loss = 4.5775e-01, PNorm = 46.1791, GNorm = 1.4753, lr_0 = 8.2309e-04
Loss = 5.2251e-01, PNorm = 46.2036, GNorm = 2.5020, lr_0 = 8.2252e-04
Loss = 5.4255e-01, PNorm = 46.2223, GNorm = 1.5670, lr_0 = 8.2196e-04
Loss = 4.4400e-01, PNorm = 46.2419, GNorm = 2.3080, lr_0 = 8.2140e-04
Loss = 4.8489e-01, PNorm = 46.2629, GNorm = 1.0417, lr_0 = 8.2084e-04
Loss = 4.6946e-01, PNorm = 46.2867, GNorm = 1.2683, lr_0 = 8.2027e-04
Loss = 3.8392e-01, PNorm = 46.3066, GNorm = 0.8074, lr_0 = 8.1971e-04
Loss = 4.5633e-01, PNorm = 46.3219, GNorm = 1.6439, lr_0 = 8.1915e-04
Loss = 4.3390e-01, PNorm = 46.3308, GNorm = 1.5964, lr_0 = 8.1859e-04
Loss = 4.5803e-01, PNorm = 46.3381, GNorm = 1.8634, lr_0 = 8.1803e-04
Loss = 4.8423e-01, PNorm = 46.3555, GNorm = 2.1042, lr_0 = 8.1747e-04
Loss = 5.1498e-01, PNorm = 46.3730, GNorm = 1.6008, lr_0 = 8.1691e-04
Loss = 5.6674e-01, PNorm = 46.3909, GNorm = 2.7218, lr_0 = 8.1635e-04
Loss = 4.5268e-01, PNorm = 46.4149, GNorm = 1.3502, lr_0 = 8.1579e-04
Loss = 4.7140e-01, PNorm = 46.4352, GNorm = 0.9737, lr_0 = 8.1523e-04
Loss = 4.8318e-01, PNorm = 46.4538, GNorm = 1.4568, lr_0 = 8.1467e-04
Loss = 4.5978e-01, PNorm = 46.4649, GNorm = 2.0519, lr_0 = 8.1411e-04
Loss = 4.6156e-01, PNorm = 46.4863, GNorm = 1.5273, lr_0 = 8.1355e-04
Loss = 5.9860e-01, PNorm = 46.5050, GNorm = 2.8461, lr_0 = 8.1300e-04
Loss = 5.3276e-01, PNorm = 46.5294, GNorm = 1.0983, lr_0 = 8.1244e-04
Loss = 4.8862e-01, PNorm = 46.5515, GNorm = 1.3371, lr_0 = 8.1188e-04
Loss = 4.7499e-01, PNorm = 46.5652, GNorm = 1.8747, lr_0 = 8.1133e-04
Loss = 5.0995e-01, PNorm = 46.5778, GNorm = 1.3414, lr_0 = 8.1077e-04
Loss = 5.2278e-01, PNorm = 46.6093, GNorm = 2.9645, lr_0 = 8.1022e-04
Loss = 4.9338e-01, PNorm = 46.6300, GNorm = 1.7640, lr_0 = 8.0966e-04
Loss = 5.2513e-01, PNorm = 46.6567, GNorm = 1.4525, lr_0 = 8.0911e-04
Loss = 4.7405e-01, PNorm = 46.6688, GNorm = 2.7659, lr_0 = 8.0855e-04
Loss = 4.3847e-01, PNorm = 46.6909, GNorm = 2.2052, lr_0 = 8.0800e-04
Loss = 4.5132e-01, PNorm = 46.7025, GNorm = 2.6701, lr_0 = 8.0745e-04
Loss = 4.9582e-01, PNorm = 46.7241, GNorm = 1.0116, lr_0 = 8.0689e-04
Loss = 5.6647e-01, PNorm = 46.7590, GNorm = 3.2799, lr_0 = 8.0634e-04
Loss = 4.3681e-01, PNorm = 46.7862, GNorm = 1.1323, lr_0 = 8.0579e-04
Loss = 4.5762e-01, PNorm = 46.7993, GNorm = 0.8872, lr_0 = 8.0523e-04
Loss = 4.9963e-01, PNorm = 46.8066, GNorm = 2.2874, lr_0 = 8.0468e-04
Loss = 6.2324e-01, PNorm = 46.8053, GNorm = 2.0252, lr_0 = 8.0413e-04
Loss = 4.8587e-01, PNorm = 46.8308, GNorm = 1.3510, lr_0 = 8.0358e-04
Loss = 4.8263e-01, PNorm = 46.8547, GNorm = 1.4072, lr_0 = 8.0303e-04
Loss = 4.7635e-01, PNorm = 46.8804, GNorm = 1.7778, lr_0 = 8.0248e-04
Loss = 5.3820e-01, PNorm = 46.8941, GNorm = 3.3810, lr_0 = 8.0193e-04
Loss = 4.7677e-01, PNorm = 46.9165, GNorm = 1.1529, lr_0 = 8.0138e-04
Loss = 5.0799e-01, PNorm = 46.9360, GNorm = 1.2198, lr_0 = 8.0083e-04
Loss = 4.3027e-01, PNorm = 46.9516, GNorm = 3.3705, lr_0 = 8.0028e-04
Loss = 5.1574e-01, PNorm = 46.9648, GNorm = 2.8550, lr_0 = 7.9974e-04
Loss = 5.0033e-01, PNorm = 46.9920, GNorm = 1.4014, lr_0 = 7.9919e-04
Loss = 4.8634e-01, PNorm = 47.0124, GNorm = 1.2294, lr_0 = 7.9864e-04
Loss = 4.0257e-01, PNorm = 47.0351, GNorm = 0.8834, lr_0 = 7.9809e-04
Loss = 4.8710e-01, PNorm = 47.0567, GNorm = 1.6020, lr_0 = 7.9755e-04
Loss = 4.6240e-01, PNorm = 47.0773, GNorm = 1.1839, lr_0 = 7.9700e-04
Loss = 4.9485e-01, PNorm = 47.0921, GNorm = 1.5333, lr_0 = 7.9645e-04
Loss = 5.0322e-01, PNorm = 47.1090, GNorm = 3.1072, lr_0 = 7.9591e-04
Loss = 4.6479e-01, PNorm = 47.1258, GNorm = 1.1110, lr_0 = 7.9536e-04
Loss = 4.7665e-01, PNorm = 47.1422, GNorm = 1.3605, lr_0 = 7.9482e-04
Loss = 4.4060e-01, PNorm = 47.1621, GNorm = 2.1612, lr_0 = 7.9427e-04
Loss = 4.6185e-01, PNorm = 47.1791, GNorm = 1.2993, lr_0 = 7.9373e-04
Loss = 4.7354e-01, PNorm = 47.1922, GNorm = 1.7021, lr_0 = 7.9319e-04
Loss = 6.0584e-01, PNorm = 47.2110, GNorm = 1.5368, lr_0 = 7.9264e-04
Loss = 4.7892e-01, PNorm = 47.2316, GNorm = 2.0550, lr_0 = 7.9210e-04
Loss = 4.8734e-01, PNorm = 47.2512, GNorm = 1.4173, lr_0 = 7.9156e-04
Loss = 4.5781e-01, PNorm = 47.2792, GNorm = 1.3763, lr_0 = 7.9101e-04
Loss = 6.0703e-01, PNorm = 47.2958, GNorm = 1.1987, lr_0 = 7.9047e-04
Loss = 4.8714e-01, PNorm = 47.3158, GNorm = 1.8374, lr_0 = 7.8993e-04
Loss = 5.3560e-01, PNorm = 47.3322, GNorm = 1.6236, lr_0 = 7.8939e-04
Loss = 4.7457e-01, PNorm = 47.3560, GNorm = 1.3580, lr_0 = 7.8885e-04
Loss = 4.9865e-01, PNorm = 47.3720, GNorm = 1.2750, lr_0 = 7.8831e-04
Loss = 5.1428e-01, PNorm = 47.3840, GNorm = 2.7875, lr_0 = 7.8777e-04
Loss = 5.2477e-01, PNorm = 47.3986, GNorm = 2.1213, lr_0 = 7.8723e-04
Loss = 4.1234e-01, PNorm = 47.4237, GNorm = 1.8057, lr_0 = 7.8669e-04
Loss = 5.1042e-01, PNorm = 47.4403, GNorm = 2.7130, lr_0 = 7.8615e-04
Loss = 5.4322e-01, PNorm = 47.4579, GNorm = 2.0466, lr_0 = 7.8561e-04
Loss = 4.6215e-01, PNorm = 47.4750, GNorm = 1.3972, lr_0 = 7.8507e-04
Loss = 4.6866e-01, PNorm = 47.4975, GNorm = 1.6615, lr_0 = 7.8454e-04
Loss = 4.7748e-01, PNorm = 47.5105, GNorm = 2.4435, lr_0 = 7.8400e-04
Loss = 5.4566e-01, PNorm = 47.5312, GNorm = 0.9907, lr_0 = 7.8346e-04
Loss = 4.9092e-01, PNorm = 47.5499, GNorm = 1.6976, lr_0 = 7.8293e-04
Loss = 4.3943e-01, PNorm = 47.5624, GNorm = 0.9524, lr_0 = 7.8239e-04
Loss = 4.6771e-01, PNorm = 47.5772, GNorm = 1.2019, lr_0 = 7.8185e-04
Loss = 4.6609e-01, PNorm = 47.5985, GNorm = 1.8369, lr_0 = 7.8132e-04
Validation mae = 0.120838
Epoch 5
Loss = 4.5674e-01, PNorm = 47.6116, GNorm = 1.2003, lr_0 = 7.8078e-04
Loss = 4.8023e-01, PNorm = 47.6337, GNorm = 1.4255, lr_0 = 7.8025e-04
Loss = 5.4201e-01, PNorm = 47.6525, GNorm = 1.3891, lr_0 = 7.7971e-04
Loss = 4.5966e-01, PNorm = 47.6699, GNorm = 1.1476, lr_0 = 7.7918e-04
Loss = 4.7409e-01, PNorm = 47.6873, GNorm = 1.3270, lr_0 = 7.7864e-04
Loss = 4.9152e-01, PNorm = 47.7045, GNorm = 1.4981, lr_0 = 7.7811e-04
Loss = 4.2979e-01, PNorm = 47.7177, GNorm = 2.0821, lr_0 = 7.7758e-04
Loss = 4.8293e-01, PNorm = 47.7359, GNorm = 1.1559, lr_0 = 7.7705e-04
Loss = 4.5761e-01, PNorm = 47.7620, GNorm = 1.1888, lr_0 = 7.7651e-04
Loss = 4.6400e-01, PNorm = 47.7851, GNorm = 1.1390, lr_0 = 7.7598e-04
Loss = 4.3124e-01, PNorm = 47.8130, GNorm = 1.0683, lr_0 = 7.7545e-04
Loss = 4.1705e-01, PNorm = 47.8316, GNorm = 1.2444, lr_0 = 7.7492e-04
Loss = 5.0408e-01, PNorm = 47.8452, GNorm = 1.4061, lr_0 = 7.7439e-04
Loss = 4.7838e-01, PNorm = 47.8713, GNorm = 1.3619, lr_0 = 7.7386e-04
Loss = 5.3628e-01, PNorm = 47.8955, GNorm = 1.4075, lr_0 = 7.7333e-04
Loss = 4.6429e-01, PNorm = 47.9078, GNorm = 1.9389, lr_0 = 7.7280e-04
Loss = 4.1649e-01, PNorm = 47.9263, GNorm = 1.5956, lr_0 = 7.7227e-04
Loss = 4.7300e-01, PNorm = 47.9418, GNorm = 1.3247, lr_0 = 7.7174e-04
Loss = 4.4299e-01, PNorm = 47.9613, GNorm = 1.1231, lr_0 = 7.7121e-04
Loss = 4.3901e-01, PNorm = 47.9839, GNorm = 1.8817, lr_0 = 7.7068e-04
Loss = 4.8736e-01, PNorm = 48.0032, GNorm = 1.3602, lr_0 = 7.7015e-04
Loss = 4.7553e-01, PNorm = 48.0201, GNorm = 1.4942, lr_0 = 7.6963e-04
Loss = 5.3430e-01, PNorm = 48.0418, GNorm = 1.5331, lr_0 = 7.6910e-04
Loss = 4.2925e-01, PNorm = 48.0624, GNorm = 2.3961, lr_0 = 7.6857e-04
Loss = 4.9865e-01, PNorm = 48.0877, GNorm = 1.6774, lr_0 = 7.6805e-04
Loss = 4.8017e-01, PNorm = 48.1056, GNorm = 1.0800, lr_0 = 7.6752e-04
Loss = 4.1232e-01, PNorm = 48.1339, GNorm = 0.9326, lr_0 = 7.6699e-04
Loss = 4.7303e-01, PNorm = 48.1609, GNorm = 2.3226, lr_0 = 7.6647e-04
Loss = 4.6310e-01, PNorm = 48.1847, GNorm = 1.3047, lr_0 = 7.6594e-04
Loss = 5.0023e-01, PNorm = 48.2076, GNorm = 1.6281, lr_0 = 7.6542e-04
Loss = 4.2923e-01, PNorm = 48.2402, GNorm = 1.9302, lr_0 = 7.6489e-04
Loss = 4.8475e-01, PNorm = 48.2684, GNorm = 1.5978, lr_0 = 7.6437e-04
Loss = 5.4838e-01, PNorm = 48.2890, GNorm = 1.4702, lr_0 = 7.6385e-04
Loss = 4.3159e-01, PNorm = 48.3094, GNorm = 0.9933, lr_0 = 7.6332e-04
Loss = 4.2377e-01, PNorm = 48.3253, GNorm = 1.1134, lr_0 = 7.6280e-04
Loss = 4.3705e-01, PNorm = 48.3448, GNorm = 1.5693, lr_0 = 7.6228e-04
Loss = 5.0534e-01, PNorm = 48.3537, GNorm = 1.8156, lr_0 = 7.6176e-04
Loss = 4.0571e-01, PNorm = 48.3649, GNorm = 1.4582, lr_0 = 7.6123e-04
Loss = 4.9567e-01, PNorm = 48.3868, GNorm = 1.2977, lr_0 = 7.6071e-04
Loss = 4.7801e-01, PNorm = 48.4104, GNorm = 1.8691, lr_0 = 7.6019e-04
Loss = 4.5443e-01, PNorm = 48.4320, GNorm = 1.5564, lr_0 = 7.5967e-04
Loss = 4.7821e-01, PNorm = 48.4514, GNorm = 0.9815, lr_0 = 7.5915e-04
Loss = 4.7463e-01, PNorm = 48.4644, GNorm = 2.8780, lr_0 = 7.5863e-04
Loss = 4.9590e-01, PNorm = 48.4854, GNorm = 1.2496, lr_0 = 7.5811e-04
Loss = 4.8039e-01, PNorm = 48.5028, GNorm = 3.3306, lr_0 = 7.5759e-04
Loss = 5.2017e-01, PNorm = 48.5100, GNorm = 1.2558, lr_0 = 7.5707e-04
Loss = 5.0667e-01, PNorm = 48.5340, GNorm = 1.4748, lr_0 = 7.5655e-04
Loss = 4.4623e-01, PNorm = 48.5507, GNorm = 1.3715, lr_0 = 7.5603e-04
Loss = 4.9305e-01, PNorm = 48.5658, GNorm = 1.9885, lr_0 = 7.5552e-04
Loss = 4.9430e-01, PNorm = 48.5805, GNorm = 1.2416, lr_0 = 7.5500e-04
Loss = 4.7065e-01, PNorm = 48.6031, GNorm = 1.5630, lr_0 = 7.5448e-04
Loss = 4.9984e-01, PNorm = 48.6216, GNorm = 1.8216, lr_0 = 7.5397e-04
Loss = 4.4133e-01, PNorm = 48.6304, GNorm = 0.9187, lr_0 = 7.5345e-04
Loss = 4.9633e-01, PNorm = 48.6436, GNorm = 1.2457, lr_0 = 7.5293e-04
Loss = 4.5883e-01, PNorm = 48.6603, GNorm = 1.1389, lr_0 = 7.5242e-04
Loss = 4.7043e-01, PNorm = 48.6747, GNorm = 1.2838, lr_0 = 7.5190e-04
Loss = 5.4407e-01, PNorm = 48.6812, GNorm = 2.6554, lr_0 = 7.5139e-04
Loss = 4.8394e-01, PNorm = 48.6894, GNorm = 3.2667, lr_0 = 7.5087e-04
Loss = 4.6294e-01, PNorm = 48.7042, GNorm = 0.8052, lr_0 = 7.5036e-04
Loss = 4.6443e-01, PNorm = 48.7215, GNorm = 1.3000, lr_0 = 7.4984e-04
Loss = 5.0233e-01, PNorm = 48.7284, GNorm = 2.9159, lr_0 = 7.4933e-04
Loss = 4.2943e-01, PNorm = 48.7448, GNorm = 1.0538, lr_0 = 7.4882e-04
Loss = 4.7275e-01, PNorm = 48.7581, GNorm = 1.0634, lr_0 = 7.4830e-04
Loss = 4.4358e-01, PNorm = 48.7705, GNorm = 1.7684, lr_0 = 7.4779e-04
Loss = 4.9735e-01, PNorm = 48.7858, GNorm = 0.9406, lr_0 = 7.4728e-04
Loss = 4.4183e-01, PNorm = 48.8063, GNorm = 1.4830, lr_0 = 7.4677e-04
Loss = 4.5812e-01, PNorm = 48.8166, GNorm = 1.6079, lr_0 = 7.4625e-04
Loss = 5.2058e-01, PNorm = 48.8249, GNorm = 2.1233, lr_0 = 7.4574e-04
Loss = 4.4439e-01, PNorm = 48.8401, GNorm = 1.3462, lr_0 = 7.4523e-04
Loss = 4.5938e-01, PNorm = 48.8554, GNorm = 1.2294, lr_0 = 7.4472e-04
Loss = 5.1604e-01, PNorm = 48.8665, GNorm = 1.3176, lr_0 = 7.4421e-04
Loss = 4.2219e-01, PNorm = 48.8804, GNorm = 1.8046, lr_0 = 7.4370e-04
Loss = 4.6690e-01, PNorm = 48.8943, GNorm = 1.5141, lr_0 = 7.4319e-04
Loss = 4.8872e-01, PNorm = 48.9228, GNorm = 0.9423, lr_0 = 7.4268e-04
Loss = 5.0156e-01, PNorm = 48.9484, GNorm = 1.7254, lr_0 = 7.4217e-04
Loss = 5.0827e-01, PNorm = 48.9718, GNorm = 2.0506, lr_0 = 7.4167e-04
Loss = 4.8728e-01, PNorm = 48.9928, GNorm = 1.4110, lr_0 = 7.4116e-04
Loss = 4.8556e-01, PNorm = 49.0065, GNorm = 1.4271, lr_0 = 7.4065e-04
Loss = 4.1748e-01, PNorm = 49.0203, GNorm = 1.1409, lr_0 = 7.4014e-04
Loss = 4.7508e-01, PNorm = 49.0445, GNorm = 1.2049, lr_0 = 7.3964e-04
Loss = 4.7674e-01, PNorm = 49.0637, GNorm = 2.4232, lr_0 = 7.3913e-04
Loss = 5.1723e-01, PNorm = 49.0756, GNorm = 1.1636, lr_0 = 7.3862e-04
Loss = 4.8784e-01, PNorm = 49.0813, GNorm = 1.2630, lr_0 = 7.3812e-04
Loss = 5.1048e-01, PNorm = 49.0997, GNorm = 2.1219, lr_0 = 7.3761e-04
Loss = 4.8775e-01, PNorm = 49.1114, GNorm = 1.3123, lr_0 = 7.3711e-04
Loss = 4.3105e-01, PNorm = 49.1352, GNorm = 1.3564, lr_0 = 7.3660e-04
Loss = 4.3572e-01, PNorm = 49.1544, GNorm = 1.2566, lr_0 = 7.3610e-04
Loss = 4.4000e-01, PNorm = 49.1750, GNorm = 1.2651, lr_0 = 7.3559e-04
Loss = 5.1673e-01, PNorm = 49.1924, GNorm = 1.5656, lr_0 = 7.3509e-04
Loss = 4.5336e-01, PNorm = 49.2077, GNorm = 1.4464, lr_0 = 7.3458e-04
Loss = 4.0460e-01, PNorm = 49.2217, GNorm = 1.4144, lr_0 = 7.3408e-04
Loss = 4.0908e-01, PNorm = 49.2337, GNorm = 1.3338, lr_0 = 7.3358e-04
Loss = 5.1775e-01, PNorm = 49.2457, GNorm = 1.8851, lr_0 = 7.3308e-04
Loss = 4.8465e-01, PNorm = 49.2691, GNorm = 0.9411, lr_0 = 7.3257e-04
Loss = 5.0704e-01, PNorm = 49.2899, GNorm = 1.6732, lr_0 = 7.3207e-04
Loss = 4.7830e-01, PNorm = 49.3117, GNorm = 1.4594, lr_0 = 7.3157e-04
Loss = 4.5100e-01, PNorm = 49.3277, GNorm = 1.1412, lr_0 = 7.3107e-04
Loss = 4.4884e-01, PNorm = 49.3412, GNorm = 0.9510, lr_0 = 7.3057e-04
Loss = 5.2496e-01, PNorm = 49.3540, GNorm = 1.7113, lr_0 = 7.3007e-04
Loss = 4.7303e-01, PNorm = 49.3717, GNorm = 0.9109, lr_0 = 7.2957e-04
Loss = 4.6699e-01, PNorm = 49.3910, GNorm = 3.3323, lr_0 = 7.2907e-04
Loss = 4.2269e-01, PNorm = 49.4075, GNorm = 1.4964, lr_0 = 7.2857e-04
Loss = 3.9116e-01, PNorm = 49.4258, GNorm = 1.3973, lr_0 = 7.2807e-04
Loss = 4.3721e-01, PNorm = 49.4369, GNorm = 0.8130, lr_0 = 7.2757e-04
Loss = 4.2857e-01, PNorm = 49.4461, GNorm = 0.8230, lr_0 = 7.2707e-04
Loss = 4.5561e-01, PNorm = 49.4553, GNorm = 1.1195, lr_0 = 7.2657e-04
Loss = 4.6082e-01, PNorm = 49.4663, GNorm = 1.8381, lr_0 = 7.2608e-04
Loss = 4.9337e-01, PNorm = 49.4799, GNorm = 1.4419, lr_0 = 7.2558e-04
Loss = 4.6494e-01, PNorm = 49.4831, GNorm = 1.4602, lr_0 = 7.2508e-04
Loss = 4.8487e-01, PNorm = 49.4995, GNorm = 1.4179, lr_0 = 7.2458e-04
Loss = 4.3828e-01, PNorm = 49.5090, GNorm = 1.0015, lr_0 = 7.2409e-04
Loss = 4.6483e-01, PNorm = 49.5285, GNorm = 0.8026, lr_0 = 7.2359e-04
Loss = 4.4807e-01, PNorm = 49.5484, GNorm = 1.5636, lr_0 = 7.2310e-04
Loss = 4.6132e-01, PNorm = 49.5599, GNorm = 1.1649, lr_0 = 7.2260e-04
Loss = 5.2214e-01, PNorm = 49.5762, GNorm = 1.5293, lr_0 = 7.2211e-04
Loss = 4.1716e-01, PNorm = 49.5969, GNorm = 1.1868, lr_0 = 7.2161e-04
Loss = 4.3145e-01, PNorm = 49.6122, GNorm = 1.3823, lr_0 = 7.2112e-04
Loss = 4.6854e-01, PNorm = 49.6298, GNorm = 1.4471, lr_0 = 7.2062e-04
Loss = 5.0036e-01, PNorm = 49.6410, GNorm = 1.2828, lr_0 = 7.2013e-04
Loss = 4.3573e-01, PNorm = 49.6520, GNorm = 1.0689, lr_0 = 7.1964e-04
Validation mae = 0.120019
Epoch 6
Loss = 4.3113e-01, PNorm = 49.6629, GNorm = 1.4319, lr_0 = 7.1914e-04
Loss = 4.7574e-01, PNorm = 49.6753, GNorm = 2.6594, lr_0 = 7.1865e-04
Loss = 5.1299e-01, PNorm = 49.6881, GNorm = 1.1887, lr_0 = 7.1816e-04
Loss = 4.4067e-01, PNorm = 49.7013, GNorm = 1.1678, lr_0 = 7.1767e-04
Loss = 4.6786e-01, PNorm = 49.7216, GNorm = 3.4914, lr_0 = 7.1717e-04
Loss = 4.9629e-01, PNorm = 49.7396, GNorm = 1.2608, lr_0 = 7.1668e-04
Loss = 4.7941e-01, PNorm = 49.7496, GNorm = 1.4077, lr_0 = 7.1619e-04
Loss = 4.8977e-01, PNorm = 49.7649, GNorm = 1.4479, lr_0 = 7.1570e-04
Loss = 4.5695e-01, PNorm = 49.7867, GNorm = 1.5935, lr_0 = 7.1521e-04
Loss = 5.5883e-01, PNorm = 49.8038, GNorm = 1.7131, lr_0 = 7.1472e-04
Loss = 4.5098e-01, PNorm = 49.8348, GNorm = 2.8860, lr_0 = 7.1423e-04
Loss = 5.0636e-01, PNorm = 49.8489, GNorm = 1.1240, lr_0 = 7.1374e-04
Loss = 4.4844e-01, PNorm = 49.8688, GNorm = 0.9111, lr_0 = 7.1325e-04
Loss = 4.5069e-01, PNorm = 49.8938, GNorm = 0.9729, lr_0 = 7.1277e-04
Loss = 4.3922e-01, PNorm = 49.9105, GNorm = 1.1963, lr_0 = 7.1228e-04
Loss = 4.5974e-01, PNorm = 49.9166, GNorm = 1.7287, lr_0 = 7.1179e-04
Loss = 4.5252e-01, PNorm = 49.9276, GNorm = 1.0187, lr_0 = 7.1130e-04
Loss = 4.4969e-01, PNorm = 49.9460, GNorm = 1.9719, lr_0 = 7.1081e-04
Loss = 5.1171e-01, PNorm = 49.9609, GNorm = 2.0772, lr_0 = 7.1033e-04
Loss = 4.6389e-01, PNorm = 49.9767, GNorm = 1.4177, lr_0 = 7.0984e-04
Loss = 4.2778e-01, PNorm = 49.9892, GNorm = 0.9327, lr_0 = 7.0935e-04
Loss = 4.0343e-01, PNorm = 50.0035, GNorm = 1.1405, lr_0 = 7.0887e-04
Loss = 3.9864e-01, PNorm = 50.0182, GNorm = 1.1183, lr_0 = 7.0838e-04
Loss = 4.5424e-01, PNorm = 50.0333, GNorm = 1.2298, lr_0 = 7.0790e-04
Loss = 4.9770e-01, PNorm = 50.0503, GNorm = 1.2878, lr_0 = 7.0741e-04
Loss = 4.6524e-01, PNorm = 50.0685, GNorm = 1.6545, lr_0 = 7.0693e-04
Loss = 4.4575e-01, PNorm = 50.0824, GNorm = 1.2744, lr_0 = 7.0644e-04
Loss = 4.5748e-01, PNorm = 50.0996, GNorm = 1.3841, lr_0 = 7.0596e-04
Loss = 4.4097e-01, PNorm = 50.1142, GNorm = 1.2018, lr_0 = 7.0548e-04
Loss = 4.7964e-01, PNorm = 50.1324, GNorm = 3.9663, lr_0 = 7.0499e-04
Loss = 4.7798e-01, PNorm = 50.1483, GNorm = 1.9695, lr_0 = 7.0451e-04
Loss = 5.5126e-01, PNorm = 50.1705, GNorm = 0.9622, lr_0 = 7.0403e-04
Loss = 4.8045e-01, PNorm = 50.1945, GNorm = 1.5677, lr_0 = 7.0354e-04
Loss = 4.3989e-01, PNorm = 50.2083, GNorm = 1.4159, lr_0 = 7.0306e-04
Loss = 4.4138e-01, PNorm = 50.2253, GNorm = 1.6111, lr_0 = 7.0258e-04
Loss = 4.7053e-01, PNorm = 50.2458, GNorm = 2.1148, lr_0 = 7.0210e-04
Loss = 4.0424e-01, PNorm = 50.2614, GNorm = 1.0934, lr_0 = 7.0162e-04
Loss = 4.8959e-01, PNorm = 50.2851, GNorm = 1.8155, lr_0 = 7.0114e-04
Loss = 4.6916e-01, PNorm = 50.2969, GNorm = 1.0724, lr_0 = 7.0066e-04
Loss = 4.3045e-01, PNorm = 50.3207, GNorm = 1.6389, lr_0 = 7.0018e-04
Loss = 4.4870e-01, PNorm = 50.3331, GNorm = 2.3060, lr_0 = 6.9970e-04
Loss = 4.4507e-01, PNorm = 50.3470, GNorm = 1.0280, lr_0 = 6.9922e-04
Loss = 4.8015e-01, PNorm = 50.3618, GNorm = 0.9802, lr_0 = 6.9874e-04
Loss = 4.9857e-01, PNorm = 50.3807, GNorm = 2.0353, lr_0 = 6.9826e-04
Loss = 4.4951e-01, PNorm = 50.4005, GNorm = 1.0226, lr_0 = 6.9778e-04
Loss = 4.9927e-01, PNorm = 50.4140, GNorm = 1.3376, lr_0 = 6.9730e-04
Loss = 4.6850e-01, PNorm = 50.4339, GNorm = 2.3488, lr_0 = 6.9683e-04
Loss = 4.8996e-01, PNorm = 50.4579, GNorm = 1.3299, lr_0 = 6.9635e-04
Loss = 4.0342e-01, PNorm = 50.4750, GNorm = 1.3983, lr_0 = 6.9587e-04
Loss = 4.4010e-01, PNorm = 50.4900, GNorm = 1.1440, lr_0 = 6.9540e-04
Loss = 4.3418e-01, PNorm = 50.5048, GNorm = 1.8696, lr_0 = 6.9492e-04
Loss = 5.0200e-01, PNorm = 50.5196, GNorm = 1.3406, lr_0 = 6.9444e-04
Loss = 5.2468e-01, PNorm = 50.5325, GNorm = 0.7866, lr_0 = 6.9397e-04
Loss = 4.9582e-01, PNorm = 50.5540, GNorm = 1.6786, lr_0 = 6.9349e-04
Loss = 5.2348e-01, PNorm = 50.5742, GNorm = 1.5174, lr_0 = 6.9302e-04
Loss = 5.0470e-01, PNorm = 50.5988, GNorm = 2.4024, lr_0 = 6.9254e-04
Loss = 4.8462e-01, PNorm = 50.6192, GNorm = 1.6827, lr_0 = 6.9207e-04
Loss = 3.8471e-01, PNorm = 50.6296, GNorm = 1.5383, lr_0 = 6.9159e-04
Loss = 4.7462e-01, PNorm = 50.6366, GNorm = 1.0811, lr_0 = 6.9112e-04
Loss = 4.1296e-01, PNorm = 50.6458, GNorm = 1.5594, lr_0 = 6.9065e-04
Loss = 4.0423e-01, PNorm = 50.6593, GNorm = 1.7236, lr_0 = 6.9017e-04
Loss = 3.9816e-01, PNorm = 50.6713, GNorm = 0.8671, lr_0 = 6.8970e-04
Loss = 4.2162e-01, PNorm = 50.6894, GNorm = 1.5708, lr_0 = 6.8923e-04
Loss = 4.4003e-01, PNorm = 50.7035, GNorm = 1.3232, lr_0 = 6.8876e-04
Loss = 5.3439e-01, PNorm = 50.7194, GNorm = 1.1606, lr_0 = 6.8828e-04
Loss = 4.7973e-01, PNorm = 50.7366, GNorm = 2.4405, lr_0 = 6.8781e-04
Loss = 4.3995e-01, PNorm = 50.7618, GNorm = 1.6948, lr_0 = 6.8734e-04
Loss = 4.5205e-01, PNorm = 50.7772, GNorm = 1.1214, lr_0 = 6.8687e-04
Loss = 4.4093e-01, PNorm = 50.7850, GNorm = 0.9653, lr_0 = 6.8640e-04
Loss = 4.7930e-01, PNorm = 50.7964, GNorm = 1.2344, lr_0 = 6.8593e-04
Loss = 4.6133e-01, PNorm = 50.8157, GNorm = 1.0405, lr_0 = 6.8546e-04
Loss = 4.4808e-01, PNorm = 50.8381, GNorm = 1.3532, lr_0 = 6.8499e-04
Loss = 4.3997e-01, PNorm = 50.8601, GNorm = 1.8632, lr_0 = 6.8452e-04
Loss = 4.4076e-01, PNorm = 50.8756, GNorm = 1.4765, lr_0 = 6.8405e-04
Loss = 5.0043e-01, PNorm = 50.8943, GNorm = 1.8804, lr_0 = 6.8358e-04
Loss = 3.9098e-01, PNorm = 50.9133, GNorm = 1.4326, lr_0 = 6.8312e-04
Loss = 4.6607e-01, PNorm = 50.9310, GNorm = 1.5100, lr_0 = 6.8265e-04
Loss = 4.3248e-01, PNorm = 50.9362, GNorm = 1.0478, lr_0 = 6.8218e-04
Loss = 4.7349e-01, PNorm = 50.9518, GNorm = 1.7074, lr_0 = 6.8171e-04
Loss = 4.6860e-01, PNorm = 50.9740, GNorm = 1.2697, lr_0 = 6.8125e-04
Loss = 4.7269e-01, PNorm = 50.9936, GNorm = 1.9359, lr_0 = 6.8078e-04
Loss = 5.1789e-01, PNorm = 51.0001, GNorm = 1.1612, lr_0 = 6.8031e-04
Loss = 4.6336e-01, PNorm = 51.0294, GNorm = 1.9912, lr_0 = 6.7985e-04
Loss = 4.8883e-01, PNorm = 51.0462, GNorm = 1.8428, lr_0 = 6.7938e-04
Loss = 4.2542e-01, PNorm = 51.0635, GNorm = 1.7134, lr_0 = 6.7892e-04
Loss = 3.7663e-01, PNorm = 51.0781, GNorm = 0.8857, lr_0 = 6.7845e-04
Loss = 4.5289e-01, PNorm = 51.0935, GNorm = 1.2168, lr_0 = 6.7799e-04
Loss = 4.7483e-01, PNorm = 51.1030, GNorm = 1.9790, lr_0 = 6.7752e-04
Loss = 4.1968e-01, PNorm = 51.1100, GNorm = 2.0833, lr_0 = 6.7706e-04
Loss = 4.0552e-01, PNorm = 51.1206, GNorm = 1.5675, lr_0 = 6.7659e-04
Loss = 3.9359e-01, PNorm = 51.1321, GNorm = 1.5301, lr_0 = 6.7613e-04
Loss = 4.6120e-01, PNorm = 51.1476, GNorm = 1.2181, lr_0 = 6.7567e-04
Loss = 4.7032e-01, PNorm = 51.1524, GNorm = 1.1958, lr_0 = 6.7520e-04
Loss = 4.3265e-01, PNorm = 51.1597, GNorm = 2.4704, lr_0 = 6.7474e-04
Loss = 4.5792e-01, PNorm = 51.1767, GNorm = 1.2022, lr_0 = 6.7428e-04
Loss = 4.6352e-01, PNorm = 51.1913, GNorm = 1.9740, lr_0 = 6.7382e-04
Loss = 4.2969e-01, PNorm = 51.1971, GNorm = 0.9791, lr_0 = 6.7335e-04
Loss = 5.2440e-01, PNorm = 51.2105, GNorm = 1.3381, lr_0 = 6.7289e-04
Loss = 4.6784e-01, PNorm = 51.2262, GNorm = 1.3644, lr_0 = 6.7243e-04
Loss = 4.5882e-01, PNorm = 51.2442, GNorm = 1.2956, lr_0 = 6.7197e-04
Loss = 4.3128e-01, PNorm = 51.2613, GNorm = 1.5982, lr_0 = 6.7151e-04
Loss = 4.2398e-01, PNorm = 51.2807, GNorm = 2.3051, lr_0 = 6.7105e-04
Loss = 4.7428e-01, PNorm = 51.2969, GNorm = 1.2923, lr_0 = 6.7059e-04
Loss = 4.2930e-01, PNorm = 51.3151, GNorm = 1.2182, lr_0 = 6.7013e-04
Loss = 5.0623e-01, PNorm = 51.3305, GNorm = 2.0653, lr_0 = 6.6967e-04
Loss = 4.4650e-01, PNorm = 51.3450, GNorm = 1.0831, lr_0 = 6.6921e-04
Loss = 4.0197e-01, PNorm = 51.3605, GNorm = 1.0488, lr_0 = 6.6876e-04
Loss = 4.9691e-01, PNorm = 51.3762, GNorm = 1.8644, lr_0 = 6.6830e-04
Loss = 4.5977e-01, PNorm = 51.3924, GNorm = 1.3704, lr_0 = 6.6784e-04
Loss = 4.4535e-01, PNorm = 51.4136, GNorm = 1.5711, lr_0 = 6.6738e-04
Loss = 4.4598e-01, PNorm = 51.4197, GNorm = 1.6550, lr_0 = 6.6693e-04
Loss = 4.2078e-01, PNorm = 51.4371, GNorm = 0.8423, lr_0 = 6.6647e-04
Loss = 4.2419e-01, PNorm = 51.4586, GNorm = 1.5750, lr_0 = 6.6601e-04
Loss = 4.4055e-01, PNorm = 51.4759, GNorm = 0.8813, lr_0 = 6.6556e-04
Loss = 4.4338e-01, PNorm = 51.4936, GNorm = 1.5252, lr_0 = 6.6510e-04
Loss = 5.1631e-01, PNorm = 51.5025, GNorm = 0.9234, lr_0 = 6.6464e-04
Loss = 4.5968e-01, PNorm = 51.5108, GNorm = 0.8564, lr_0 = 6.6419e-04
Loss = 4.1270e-01, PNorm = 51.5308, GNorm = 1.2566, lr_0 = 6.6373e-04
Loss = 4.9493e-01, PNorm = 51.5435, GNorm = 3.4619, lr_0 = 6.6328e-04
Loss = 4.8130e-01, PNorm = 51.5467, GNorm = 1.6127, lr_0 = 6.6282e-04
Validation mae = 0.125924
Epoch 7
Loss = 4.8214e-01, PNorm = 51.5671, GNorm = 2.2589, lr_0 = 6.6237e-04
Loss = 4.8514e-01, PNorm = 51.5793, GNorm = 2.4574, lr_0 = 6.6192e-04
Loss = 4.8718e-01, PNorm = 51.5934, GNorm = 1.1933, lr_0 = 6.6146e-04
Loss = 4.0740e-01, PNorm = 51.6120, GNorm = 1.0492, lr_0 = 6.6101e-04
Loss = 4.4800e-01, PNorm = 51.6280, GNorm = 1.5868, lr_0 = 6.6056e-04
Loss = 4.1688e-01, PNorm = 51.6440, GNorm = 1.2522, lr_0 = 6.6011e-04
Loss = 4.1550e-01, PNorm = 51.6673, GNorm = 1.3186, lr_0 = 6.5965e-04
Loss = 5.4152e-01, PNorm = 51.6843, GNorm = 1.7817, lr_0 = 6.5920e-04
Loss = 4.2072e-01, PNorm = 51.6971, GNorm = 1.0761, lr_0 = 6.5875e-04
Loss = 3.9855e-01, PNorm = 51.7138, GNorm = 1.4322, lr_0 = 6.5830e-04
Loss = 4.1402e-01, PNorm = 51.7272, GNorm = 1.6199, lr_0 = 6.5785e-04
Loss = 4.6864e-01, PNorm = 51.7419, GNorm = 1.0989, lr_0 = 6.5740e-04
Loss = 3.8926e-01, PNorm = 51.7588, GNorm = 0.8062, lr_0 = 6.5695e-04
Loss = 4.3096e-01, PNorm = 51.7690, GNorm = 1.2489, lr_0 = 6.5650e-04
Loss = 4.5533e-01, PNorm = 51.7828, GNorm = 1.0193, lr_0 = 6.5605e-04
Loss = 4.5105e-01, PNorm = 51.7953, GNorm = 1.5591, lr_0 = 6.5560e-04
Loss = 4.1898e-01, PNorm = 51.8042, GNorm = 1.4507, lr_0 = 6.5515e-04
Loss = 3.9564e-01, PNorm = 51.8108, GNorm = 1.4188, lr_0 = 6.5470e-04
Loss = 4.3028e-01, PNorm = 51.8171, GNorm = 1.8131, lr_0 = 6.5425e-04
Loss = 4.4846e-01, PNorm = 51.8299, GNorm = 1.3338, lr_0 = 6.5380e-04
Loss = 4.0453e-01, PNorm = 51.8388, GNorm = 1.2335, lr_0 = 6.5335e-04
Loss = 4.2172e-01, PNorm = 51.8567, GNorm = 1.8671, lr_0 = 6.5291e-04
Loss = 3.6975e-01, PNorm = 51.8733, GNorm = 1.4178, lr_0 = 6.5246e-04
Loss = 4.1447e-01, PNorm = 51.8837, GNorm = 1.2470, lr_0 = 6.5201e-04
Loss = 4.2696e-01, PNorm = 51.8923, GNorm = 0.9141, lr_0 = 6.5157e-04
Loss = 4.4738e-01, PNorm = 51.8985, GNorm = 1.2634, lr_0 = 6.5112e-04
Loss = 4.7826e-01, PNorm = 51.9053, GNorm = 1.1205, lr_0 = 6.5067e-04
Loss = 4.3790e-01, PNorm = 51.9144, GNorm = 1.5129, lr_0 = 6.5023e-04
Loss = 4.2297e-01, PNorm = 51.9344, GNorm = 1.7061, lr_0 = 6.4978e-04
Loss = 5.1045e-01, PNorm = 51.9409, GNorm = 2.3364, lr_0 = 6.4934e-04
Loss = 4.9109e-01, PNorm = 51.9685, GNorm = 1.6488, lr_0 = 6.4889e-04
Loss = 4.1168e-01, PNorm = 51.9889, GNorm = 1.2805, lr_0 = 6.4845e-04
Loss = 5.0235e-01, PNorm = 52.0021, GNorm = 1.4174, lr_0 = 6.4800e-04
Loss = 4.4574e-01, PNorm = 52.0321, GNorm = 1.1710, lr_0 = 6.4756e-04
Loss = 4.5743e-01, PNorm = 52.0546, GNorm = 1.5075, lr_0 = 6.4712e-04
Loss = 4.3240e-01, PNorm = 52.0740, GNorm = 1.4230, lr_0 = 6.4667e-04
Loss = 3.9185e-01, PNorm = 52.0949, GNorm = 1.2448, lr_0 = 6.4623e-04
Loss = 4.3959e-01, PNorm = 52.1102, GNorm = 0.8968, lr_0 = 6.4579e-04
Loss = 5.1366e-01, PNorm = 52.1255, GNorm = 1.6743, lr_0 = 6.4534e-04
Loss = 4.6900e-01, PNorm = 52.1399, GNorm = 1.5044, lr_0 = 6.4490e-04
Loss = 4.1997e-01, PNorm = 52.1551, GNorm = 1.2473, lr_0 = 6.4446e-04
Loss = 4.5779e-01, PNorm = 52.1706, GNorm = 1.7739, lr_0 = 6.4402e-04
Loss = 4.3525e-01, PNorm = 52.1836, GNorm = 1.6752, lr_0 = 6.4358e-04
Loss = 4.5449e-01, PNorm = 52.1944, GNorm = 1.2985, lr_0 = 6.4314e-04
Loss = 4.6254e-01, PNorm = 52.2130, GNorm = 1.4073, lr_0 = 6.4270e-04
Loss = 4.5134e-01, PNorm = 52.2301, GNorm = 1.6615, lr_0 = 6.4226e-04
Loss = 4.0834e-01, PNorm = 52.2489, GNorm = 1.5344, lr_0 = 6.4182e-04
Loss = 4.3283e-01, PNorm = 52.2669, GNorm = 1.1067, lr_0 = 6.4138e-04
Loss = 4.8221e-01, PNorm = 52.2862, GNorm = 1.1255, lr_0 = 6.4094e-04
Loss = 4.8259e-01, PNorm = 52.2976, GNorm = 2.0199, lr_0 = 6.4050e-04
Loss = 4.7208e-01, PNorm = 52.3044, GNorm = 1.2076, lr_0 = 6.4006e-04
Loss = 4.2367e-01, PNorm = 52.3202, GNorm = 1.1537, lr_0 = 6.3962e-04
Loss = 4.6123e-01, PNorm = 52.3332, GNorm = 1.2214, lr_0 = 6.3918e-04
Loss = 4.4008e-01, PNorm = 52.3549, GNorm = 1.5217, lr_0 = 6.3874e-04
Loss = 4.1926e-01, PNorm = 52.3705, GNorm = 1.4692, lr_0 = 6.3831e-04
Loss = 4.0088e-01, PNorm = 52.3797, GNorm = 1.5094, lr_0 = 6.3787e-04
Loss = 4.8762e-01, PNorm = 52.3890, GNorm = 1.2573, lr_0 = 6.3743e-04
Loss = 4.7216e-01, PNorm = 52.3964, GNorm = 1.2245, lr_0 = 6.3700e-04
Loss = 4.5901e-01, PNorm = 52.4094, GNorm = 1.4119, lr_0 = 6.3656e-04
Loss = 4.3975e-01, PNorm = 52.4148, GNorm = 1.3893, lr_0 = 6.3612e-04
Loss = 4.2966e-01, PNorm = 52.4284, GNorm = 1.6246, lr_0 = 6.3569e-04
Loss = 4.4848e-01, PNorm = 52.4394, GNorm = 1.4669, lr_0 = 6.3525e-04
Loss = 4.5588e-01, PNorm = 52.4547, GNorm = 1.0808, lr_0 = 6.3482e-04
Loss = 4.4967e-01, PNorm = 52.4726, GNorm = 1.5425, lr_0 = 6.3438e-04
Loss = 4.8211e-01, PNorm = 52.4884, GNorm = 1.6907, lr_0 = 6.3395e-04
Loss = 4.8376e-01, PNorm = 52.5101, GNorm = 1.8128, lr_0 = 6.3351e-04
Loss = 4.1622e-01, PNorm = 52.5234, GNorm = 0.9420, lr_0 = 6.3308e-04
Loss = 4.8507e-01, PNorm = 52.5305, GNorm = 1.3581, lr_0 = 6.3265e-04
Loss = 4.1845e-01, PNorm = 52.5416, GNorm = 1.1013, lr_0 = 6.3221e-04
Loss = 4.3377e-01, PNorm = 52.5482, GNorm = 1.1442, lr_0 = 6.3178e-04
Loss = 4.7986e-01, PNorm = 52.5610, GNorm = 1.4028, lr_0 = 6.3135e-04
Loss = 3.8882e-01, PNorm = 52.5717, GNorm = 1.3368, lr_0 = 6.3091e-04
Loss = 4.8432e-01, PNorm = 52.5868, GNorm = 1.8856, lr_0 = 6.3048e-04
Loss = 4.2154e-01, PNorm = 52.6061, GNorm = 1.4398, lr_0 = 6.3005e-04
Loss = 5.0488e-01, PNorm = 52.6219, GNorm = 2.0518, lr_0 = 6.2962e-04
Loss = 5.9759e-01, PNorm = 52.6339, GNorm = 1.0245, lr_0 = 6.2919e-04
Loss = 4.5827e-01, PNorm = 52.6437, GNorm = 1.3619, lr_0 = 6.2876e-04
Loss = 4.1202e-01, PNorm = 52.6598, GNorm = 0.8692, lr_0 = 6.2833e-04
Loss = 4.5731e-01, PNorm = 52.6739, GNorm = 1.6000, lr_0 = 6.2789e-04
Loss = 4.1655e-01, PNorm = 52.6942, GNorm = 1.2404, lr_0 = 6.2746e-04
Loss = 4.2909e-01, PNorm = 52.7113, GNorm = 1.6943, lr_0 = 6.2703e-04
Loss = 4.3926e-01, PNorm = 52.7285, GNorm = 2.7099, lr_0 = 6.2661e-04
Loss = 4.2317e-01, PNorm = 52.7496, GNorm = 1.6352, lr_0 = 6.2618e-04
Loss = 4.2926e-01, PNorm = 52.7669, GNorm = 1.3546, lr_0 = 6.2575e-04
Loss = 4.3574e-01, PNorm = 52.7821, GNorm = 1.2287, lr_0 = 6.2532e-04
Loss = 4.7206e-01, PNorm = 52.7959, GNorm = 1.2257, lr_0 = 6.2489e-04
Loss = 3.9201e-01, PNorm = 52.8047, GNorm = 1.4142, lr_0 = 6.2446e-04
Loss = 4.2359e-01, PNorm = 52.8166, GNorm = 1.4438, lr_0 = 6.2403e-04
Loss = 4.2863e-01, PNorm = 52.8295, GNorm = 1.2812, lr_0 = 6.2361e-04
Loss = 5.3911e-01, PNorm = 52.8369, GNorm = 1.0825, lr_0 = 6.2318e-04
Loss = 3.4854e-01, PNorm = 52.8465, GNorm = 0.8287, lr_0 = 6.2275e-04
Loss = 4.0839e-01, PNorm = 52.8558, GNorm = 1.7448, lr_0 = 6.2233e-04
Loss = 4.2483e-01, PNorm = 52.8657, GNorm = 1.5196, lr_0 = 6.2190e-04
Loss = 4.3188e-01, PNorm = 52.8739, GNorm = 1.0879, lr_0 = 6.2147e-04
Loss = 4.6655e-01, PNorm = 52.8855, GNorm = 1.8315, lr_0 = 6.2105e-04
Loss = 4.9453e-01, PNorm = 52.8998, GNorm = 1.5824, lr_0 = 6.2062e-04
Loss = 4.5353e-01, PNorm = 52.9101, GNorm = 2.1630, lr_0 = 6.2020e-04
Loss = 4.7021e-01, PNorm = 52.9233, GNorm = 1.4761, lr_0 = 6.1977e-04
Loss = 4.9266e-01, PNorm = 52.9394, GNorm = 1.7891, lr_0 = 6.1935e-04
Loss = 4.6373e-01, PNorm = 52.9623, GNorm = 1.2546, lr_0 = 6.1892e-04
Loss = 4.9381e-01, PNorm = 52.9744, GNorm = 1.6560, lr_0 = 6.1850e-04
Loss = 4.8569e-01, PNorm = 52.9800, GNorm = 1.4659, lr_0 = 6.1808e-04
Loss = 4.0322e-01, PNorm = 52.9918, GNorm = 1.3360, lr_0 = 6.1765e-04
Loss = 4.5412e-01, PNorm = 53.0004, GNorm = 1.3739, lr_0 = 6.1723e-04
Loss = 4.5446e-01, PNorm = 53.0144, GNorm = 1.8446, lr_0 = 6.1681e-04
Loss = 4.1133e-01, PNorm = 53.0299, GNorm = 0.9775, lr_0 = 6.1638e-04
Loss = 4.5800e-01, PNorm = 53.0385, GNorm = 1.1689, lr_0 = 6.1596e-04
Loss = 4.6672e-01, PNorm = 53.0501, GNorm = 2.0054, lr_0 = 6.1554e-04
Loss = 4.1458e-01, PNorm = 53.0651, GNorm = 1.2474, lr_0 = 6.1512e-04
Loss = 4.2065e-01, PNorm = 53.0753, GNorm = 0.8559, lr_0 = 6.1470e-04
Loss = 4.1789e-01, PNorm = 53.0932, GNorm = 0.9351, lr_0 = 6.1428e-04
Loss = 4.2074e-01, PNorm = 53.1040, GNorm = 1.2782, lr_0 = 6.1385e-04
Loss = 4.1363e-01, PNorm = 53.1138, GNorm = 1.8644, lr_0 = 6.1343e-04
Loss = 4.0823e-01, PNorm = 53.1145, GNorm = 1.8991, lr_0 = 6.1301e-04
Loss = 4.0228e-01, PNorm = 53.1260, GNorm = 1.2911, lr_0 = 6.1259e-04
Loss = 3.9461e-01, PNorm = 53.1394, GNorm = 0.8399, lr_0 = 6.1217e-04
Loss = 4.4505e-01, PNorm = 53.1493, GNorm = 1.1993, lr_0 = 6.1175e-04
Loss = 4.1233e-01, PNorm = 53.1572, GNorm = 1.6871, lr_0 = 6.1134e-04
Loss = 4.4378e-01, PNorm = 53.1694, GNorm = 0.8243, lr_0 = 6.1092e-04
Loss = 4.6351e-01, PNorm = 53.1843, GNorm = 1.1268, lr_0 = 6.1050e-04
Validation mae = 0.117026
Epoch 8
Loss = 4.7445e-01, PNorm = 53.1980, GNorm = 1.3451, lr_0 = 6.1008e-04
Loss = 4.0035e-01, PNorm = 53.2146, GNorm = 0.9665, lr_0 = 6.0966e-04
Loss = 4.7073e-01, PNorm = 53.2258, GNorm = 2.0943, lr_0 = 6.0924e-04
Loss = 4.2342e-01, PNorm = 53.2363, GNorm = 1.4054, lr_0 = 6.0883e-04
Loss = 4.3933e-01, PNorm = 53.2524, GNorm = 1.3469, lr_0 = 6.0841e-04
Loss = 4.8122e-01, PNorm = 53.2689, GNorm = 1.2828, lr_0 = 6.0799e-04
Loss = 4.1390e-01, PNorm = 53.2788, GNorm = 1.3840, lr_0 = 6.0758e-04
Loss = 4.0126e-01, PNorm = 53.2860, GNorm = 0.9916, lr_0 = 6.0716e-04
Loss = 3.8287e-01, PNorm = 53.2976, GNorm = 1.5317, lr_0 = 6.0674e-04
Loss = 4.5381e-01, PNorm = 53.3083, GNorm = 1.1674, lr_0 = 6.0633e-04
Loss = 4.1454e-01, PNorm = 53.3208, GNorm = 1.1834, lr_0 = 6.0591e-04
Loss = 4.8012e-01, PNorm = 53.3277, GNorm = 1.2076, lr_0 = 6.0550e-04
Loss = 4.5207e-01, PNorm = 53.3395, GNorm = 0.9791, lr_0 = 6.0508e-04
Loss = 4.5337e-01, PNorm = 53.3584, GNorm = 1.3610, lr_0 = 6.0467e-04
Loss = 4.5541e-01, PNorm = 53.3724, GNorm = 1.8048, lr_0 = 6.0425e-04
Loss = 4.5531e-01, PNorm = 53.3775, GNorm = 1.2821, lr_0 = 6.0384e-04
Loss = 3.7986e-01, PNorm = 53.3894, GNorm = 1.1652, lr_0 = 6.0343e-04
Loss = 3.6333e-01, PNorm = 53.4013, GNorm = 1.4441, lr_0 = 6.0301e-04
Loss = 4.1178e-01, PNorm = 53.4096, GNorm = 1.0351, lr_0 = 6.0260e-04
Loss = 4.2147e-01, PNorm = 53.4201, GNorm = 1.3009, lr_0 = 6.0219e-04
Loss = 4.1256e-01, PNorm = 53.4345, GNorm = 1.3353, lr_0 = 6.0178e-04
Loss = 5.0419e-01, PNorm = 53.4460, GNorm = 2.2243, lr_0 = 6.0136e-04
Loss = 4.7071e-01, PNorm = 53.4575, GNorm = 1.3588, lr_0 = 6.0095e-04
Loss = 4.4282e-01, PNorm = 53.4675, GNorm = 1.1120, lr_0 = 6.0054e-04
Loss = 4.5644e-01, PNorm = 53.4869, GNorm = 2.5569, lr_0 = 6.0013e-04
Loss = 4.5433e-01, PNorm = 53.4974, GNorm = 1.9468, lr_0 = 5.9972e-04
Loss = 4.3574e-01, PNorm = 53.5155, GNorm = 1.5030, lr_0 = 5.9931e-04
Loss = 4.7708e-01, PNorm = 53.5264, GNorm = 1.4191, lr_0 = 5.9890e-04
Loss = 4.1507e-01, PNorm = 53.5449, GNorm = 0.9429, lr_0 = 5.9849e-04
Loss = 4.2635e-01, PNorm = 53.5602, GNorm = 1.3646, lr_0 = 5.9808e-04
Loss = 4.8125e-01, PNorm = 53.5684, GNorm = 1.3480, lr_0 = 5.9767e-04
Loss = 4.4088e-01, PNorm = 53.5773, GNorm = 1.3577, lr_0 = 5.9726e-04
Loss = 4.5675e-01, PNorm = 53.5884, GNorm = 1.0974, lr_0 = 5.9685e-04
Loss = 4.1913e-01, PNorm = 53.6007, GNorm = 2.2547, lr_0 = 5.9644e-04
Loss = 4.6766e-01, PNorm = 53.6122, GNorm = 1.8392, lr_0 = 5.9603e-04
Loss = 3.7853e-01, PNorm = 53.6301, GNorm = 1.1480, lr_0 = 5.9562e-04
Loss = 4.3198e-01, PNorm = 53.6436, GNorm = 1.7098, lr_0 = 5.9521e-04
Loss = 4.2196e-01, PNorm = 53.6549, GNorm = 1.3939, lr_0 = 5.9481e-04
Loss = 4.8027e-01, PNorm = 53.6730, GNorm = 1.6218, lr_0 = 5.9440e-04
Loss = 3.9967e-01, PNorm = 53.6826, GNorm = 1.3567, lr_0 = 5.9399e-04
Loss = 4.6774e-01, PNorm = 53.6894, GNorm = 1.2506, lr_0 = 5.9358e-04
Loss = 4.3713e-01, PNorm = 53.6981, GNorm = 1.3151, lr_0 = 5.9318e-04
Loss = 4.3796e-01, PNorm = 53.7081, GNorm = 1.3346, lr_0 = 5.9277e-04
Loss = 4.2425e-01, PNorm = 53.7122, GNorm = 2.1755, lr_0 = 5.9236e-04
Loss = 4.0903e-01, PNorm = 53.7199, GNorm = 2.2624, lr_0 = 5.9196e-04
Loss = 4.0239e-01, PNorm = 53.7349, GNorm = 1.5871, lr_0 = 5.9155e-04
Loss = 3.6243e-01, PNorm = 53.7449, GNorm = 0.9887, lr_0 = 5.9115e-04
Loss = 4.7872e-01, PNorm = 53.7591, GNorm = 1.4309, lr_0 = 5.9074e-04
Loss = 4.5178e-01, PNorm = 53.7742, GNorm = 1.6138, lr_0 = 5.9034e-04
Loss = 4.8326e-01, PNorm = 53.7818, GNorm = 2.3508, lr_0 = 5.8993e-04
Loss = 4.0843e-01, PNorm = 53.7939, GNorm = 1.7468, lr_0 = 5.8953e-04
Loss = 3.7098e-01, PNorm = 53.8036, GNorm = 1.3522, lr_0 = 5.8913e-04
Loss = 3.9932e-01, PNorm = 53.8131, GNorm = 1.6607, lr_0 = 5.8872e-04
Loss = 4.2766e-01, PNorm = 53.8230, GNorm = 1.2393, lr_0 = 5.8832e-04
Loss = 4.0600e-01, PNorm = 53.8361, GNorm = 1.2781, lr_0 = 5.8792e-04
Loss = 3.9478e-01, PNorm = 53.8481, GNorm = 1.4273, lr_0 = 5.8751e-04
Loss = 4.8114e-01, PNorm = 53.8605, GNorm = 1.2581, lr_0 = 5.8711e-04
Loss = 3.8051e-01, PNorm = 53.8760, GNorm = 1.3544, lr_0 = 5.8671e-04
Loss = 4.0856e-01, PNorm = 53.8852, GNorm = 1.6998, lr_0 = 5.8631e-04
Loss = 4.3561e-01, PNorm = 53.8985, GNorm = 1.8145, lr_0 = 5.8591e-04
Loss = 4.2278e-01, PNorm = 53.9060, GNorm = 1.4645, lr_0 = 5.8550e-04
Loss = 4.6232e-01, PNorm = 53.9174, GNorm = 1.4243, lr_0 = 5.8510e-04
Loss = 4.8672e-01, PNorm = 53.9279, GNorm = 1.5301, lr_0 = 5.8470e-04
Loss = 4.8055e-01, PNorm = 53.9393, GNorm = 1.2634, lr_0 = 5.8430e-04
Loss = 4.6858e-01, PNorm = 53.9538, GNorm = 1.4452, lr_0 = 5.8390e-04
Loss = 4.0617e-01, PNorm = 53.9739, GNorm = 1.5694, lr_0 = 5.8350e-04
Loss = 4.5820e-01, PNorm = 53.9885, GNorm = 1.2388, lr_0 = 5.8310e-04
Loss = 4.6620e-01, PNorm = 54.0023, GNorm = 1.9166, lr_0 = 5.8270e-04
Loss = 4.1085e-01, PNorm = 54.0137, GNorm = 1.6686, lr_0 = 5.8230e-04
Loss = 4.1288e-01, PNorm = 54.0167, GNorm = 1.5869, lr_0 = 5.8190e-04
Loss = 4.6413e-01, PNorm = 54.0292, GNorm = 1.3150, lr_0 = 5.8151e-04
Loss = 4.3609e-01, PNorm = 54.0419, GNorm = 1.2373, lr_0 = 5.8111e-04
Loss = 4.9391e-01, PNorm = 54.0558, GNorm = 1.2906, lr_0 = 5.8071e-04
Loss = 4.4390e-01, PNorm = 54.0679, GNorm = 1.8661, lr_0 = 5.8031e-04
Loss = 4.6756e-01, PNorm = 54.0846, GNorm = 2.0971, lr_0 = 5.7991e-04
Loss = 4.4792e-01, PNorm = 54.0880, GNorm = 1.3627, lr_0 = 5.7952e-04
Loss = 4.4941e-01, PNorm = 54.0998, GNorm = 1.7607, lr_0 = 5.7912e-04
Loss = 4.1508e-01, PNorm = 54.1130, GNorm = 1.7677, lr_0 = 5.7872e-04
Loss = 4.0334e-01, PNorm = 54.1286, GNorm = 1.1658, lr_0 = 5.7833e-04
Loss = 4.9430e-01, PNorm = 54.1365, GNorm = 1.4226, lr_0 = 5.7793e-04
Loss = 4.6962e-01, PNorm = 54.1509, GNorm = 1.5299, lr_0 = 5.7753e-04
Loss = 4.3094e-01, PNorm = 54.1582, GNorm = 1.4795, lr_0 = 5.7714e-04
Loss = 4.3492e-01, PNorm = 54.1697, GNorm = 1.3273, lr_0 = 5.7674e-04
Loss = 4.8903e-01, PNorm = 54.1857, GNorm = 1.3959, lr_0 = 5.7635e-04
Loss = 4.6285e-01, PNorm = 54.1954, GNorm = 1.0328, lr_0 = 5.7595e-04
Loss = 4.4812e-01, PNorm = 54.2145, GNorm = 4.0253, lr_0 = 5.7556e-04
Loss = 4.2951e-01, PNorm = 54.2195, GNorm = 1.8255, lr_0 = 5.7516e-04
Loss = 4.2603e-01, PNorm = 54.2360, GNorm = 1.4959, lr_0 = 5.7477e-04
Loss = 4.3801e-01, PNorm = 54.2507, GNorm = 1.5644, lr_0 = 5.7438e-04
Loss = 4.6619e-01, PNorm = 54.2619, GNorm = 0.9921, lr_0 = 5.7398e-04
Loss = 4.3477e-01, PNorm = 54.2694, GNorm = 1.1556, lr_0 = 5.7359e-04
Loss = 4.5586e-01, PNorm = 54.2700, GNorm = 1.2350, lr_0 = 5.7320e-04
Loss = 3.6017e-01, PNorm = 54.2771, GNorm = 1.2017, lr_0 = 5.7280e-04
Loss = 4.1363e-01, PNorm = 54.2863, GNorm = 1.6277, lr_0 = 5.7241e-04
Loss = 3.8311e-01, PNorm = 54.2936, GNorm = 0.8219, lr_0 = 5.7202e-04
Loss = 4.6575e-01, PNorm = 54.3074, GNorm = 1.5088, lr_0 = 5.7163e-04
Loss = 4.4526e-01, PNorm = 54.3180, GNorm = 1.5022, lr_0 = 5.7124e-04
Loss = 4.5069e-01, PNorm = 54.3267, GNorm = 1.6411, lr_0 = 5.7084e-04
Loss = 4.1468e-01, PNorm = 54.3391, GNorm = 1.5554, lr_0 = 5.7045e-04
Loss = 4.2995e-01, PNorm = 54.3510, GNorm = 1.4112, lr_0 = 5.7006e-04
Loss = 4.6322e-01, PNorm = 54.3648, GNorm = 1.2187, lr_0 = 5.6967e-04
Loss = 4.2069e-01, PNorm = 54.3787, GNorm = 1.5488, lr_0 = 5.6928e-04
Loss = 3.8818e-01, PNorm = 54.3882, GNorm = 1.0355, lr_0 = 5.6889e-04
Loss = 3.8129e-01, PNorm = 54.3939, GNorm = 1.1131, lr_0 = 5.6850e-04
Loss = 4.1913e-01, PNorm = 54.4040, GNorm = 1.3412, lr_0 = 5.6811e-04
Loss = 3.9884e-01, PNorm = 54.4189, GNorm = 0.8691, lr_0 = 5.6772e-04
Loss = 4.0087e-01, PNorm = 54.4252, GNorm = 1.5503, lr_0 = 5.6733e-04
Loss = 4.2834e-01, PNorm = 54.4266, GNorm = 1.2516, lr_0 = 5.6695e-04
Loss = 4.4278e-01, PNorm = 54.4332, GNorm = 1.9588, lr_0 = 5.6656e-04
Loss = 4.0637e-01, PNorm = 54.4428, GNorm = 1.4383, lr_0 = 5.6617e-04
Loss = 4.1892e-01, PNorm = 54.4560, GNorm = 2.0136, lr_0 = 5.6578e-04
Loss = 3.7534e-01, PNorm = 54.4696, GNorm = 1.4822, lr_0 = 5.6539e-04
Loss = 4.4413e-01, PNorm = 54.4814, GNorm = 2.2385, lr_0 = 5.6501e-04
Loss = 4.0939e-01, PNorm = 54.4888, GNorm = 1.2983, lr_0 = 5.6462e-04
Loss = 4.1146e-01, PNorm = 54.5013, GNorm = 2.2727, lr_0 = 5.6423e-04
Loss = 4.1890e-01, PNorm = 54.5031, GNorm = 2.1605, lr_0 = 5.6385e-04
Loss = 4.3685e-01, PNorm = 54.5120, GNorm = 1.0484, lr_0 = 5.6346e-04
Loss = 4.3581e-01, PNorm = 54.5238, GNorm = 1.2511, lr_0 = 5.6307e-04
Loss = 4.5029e-01, PNorm = 54.5322, GNorm = 1.7011, lr_0 = 5.6269e-04
Loss = 4.1297e-01, PNorm = 54.5370, GNorm = 1.6366, lr_0 = 5.6230e-04
Validation mae = 0.117928
Epoch 9
Loss = 4.4234e-01, PNorm = 54.5527, GNorm = 1.7040, lr_0 = 5.6192e-04
Loss = 3.9083e-01, PNorm = 54.5664, GNorm = 1.1698, lr_0 = 5.6153e-04
Loss = 4.4878e-01, PNorm = 54.5755, GNorm = 1.3723, lr_0 = 5.6115e-04
Loss = 3.8213e-01, PNorm = 54.5816, GNorm = 0.9592, lr_0 = 5.6076e-04
Loss = 4.4344e-01, PNorm = 54.5876, GNorm = 1.7756, lr_0 = 5.6038e-04
Loss = 4.2977e-01, PNorm = 54.6017, GNorm = 1.2330, lr_0 = 5.6000e-04
Loss = 4.7816e-01, PNorm = 54.6184, GNorm = 1.2640, lr_0 = 5.5961e-04
Loss = 4.1419e-01, PNorm = 54.6292, GNorm = 1.4028, lr_0 = 5.5923e-04
Loss = 3.4101e-01, PNorm = 54.6401, GNorm = 1.6998, lr_0 = 5.5885e-04
Loss = 4.6612e-01, PNorm = 54.6546, GNorm = 1.0069, lr_0 = 5.5846e-04
Loss = 4.2490e-01, PNorm = 54.6676, GNorm = 1.3894, lr_0 = 5.5808e-04
Loss = 4.7446e-01, PNorm = 54.6797, GNorm = 2.2149, lr_0 = 5.5770e-04
Loss = 4.3671e-01, PNorm = 54.6929, GNorm = 1.1022, lr_0 = 5.5732e-04
Loss = 4.3571e-01, PNorm = 54.7017, GNorm = 1.1136, lr_0 = 5.5693e-04
Loss = 4.4284e-01, PNorm = 54.7088, GNorm = 1.0867, lr_0 = 5.5655e-04
Loss = 4.7392e-01, PNorm = 54.7189, GNorm = 1.4124, lr_0 = 5.5617e-04
Loss = 4.2776e-01, PNorm = 54.7322, GNorm = 1.2639, lr_0 = 5.5579e-04
Loss = 4.0702e-01, PNorm = 54.7421, GNorm = 1.2755, lr_0 = 5.5541e-04
Loss = 4.7620e-01, PNorm = 54.7545, GNorm = 1.0717, lr_0 = 5.5503e-04
Loss = 4.0750e-01, PNorm = 54.7614, GNorm = 0.9436, lr_0 = 5.5465e-04
Loss = 3.5391e-01, PNorm = 54.7749, GNorm = 1.2593, lr_0 = 5.5427e-04
Loss = 4.2031e-01, PNorm = 54.7896, GNorm = 1.9514, lr_0 = 5.5389e-04
Loss = 5.5094e-01, PNorm = 54.8059, GNorm = 2.0204, lr_0 = 5.5351e-04
Loss = 4.5167e-01, PNorm = 54.8217, GNorm = 2.5192, lr_0 = 5.5313e-04
Loss = 4.1380e-01, PNorm = 54.8332, GNorm = 1.6016, lr_0 = 5.5275e-04
Loss = 4.2452e-01, PNorm = 54.8430, GNorm = 1.0727, lr_0 = 5.5237e-04
Loss = 4.1168e-01, PNorm = 54.8527, GNorm = 1.2692, lr_0 = 5.5199e-04
Loss = 4.4122e-01, PNorm = 54.8652, GNorm = 1.3282, lr_0 = 5.5162e-04
Loss = 3.9876e-01, PNorm = 54.8775, GNorm = 0.8849, lr_0 = 5.5124e-04
Loss = 4.5308e-01, PNorm = 54.8866, GNorm = 1.8199, lr_0 = 5.5086e-04
Loss = 4.1027e-01, PNorm = 54.8964, GNorm = 1.0590, lr_0 = 5.5048e-04
Loss = 4.3887e-01, PNorm = 54.9093, GNorm = 2.0216, lr_0 = 5.5011e-04
Loss = 4.1398e-01, PNorm = 54.9181, GNorm = 1.2385, lr_0 = 5.4973e-04
Loss = 4.5604e-01, PNorm = 54.9293, GNorm = 2.2876, lr_0 = 5.4935e-04
Loss = 3.9918e-01, PNorm = 54.9424, GNorm = 1.4636, lr_0 = 5.4898e-04
Loss = 4.2895e-01, PNorm = 54.9556, GNorm = 1.3742, lr_0 = 5.4860e-04
Loss = 4.2975e-01, PNorm = 54.9652, GNorm = 1.4429, lr_0 = 5.4822e-04
Loss = 4.6256e-01, PNorm = 54.9844, GNorm = 1.8939, lr_0 = 5.4785e-04
Loss = 4.3119e-01, PNorm = 54.9995, GNorm = 2.4445, lr_0 = 5.4747e-04
Loss = 4.2757e-01, PNorm = 55.0125, GNorm = 2.3579, lr_0 = 5.4710e-04
Loss = 4.3591e-01, PNorm = 55.0224, GNorm = 2.2116, lr_0 = 5.4672e-04
Loss = 4.3519e-01, PNorm = 55.0349, GNorm = 1.4572, lr_0 = 5.4635e-04
Loss = 4.4360e-01, PNorm = 55.0460, GNorm = 0.8856, lr_0 = 5.4597e-04
Loss = 3.8681e-01, PNorm = 55.0530, GNorm = 1.2694, lr_0 = 5.4560e-04
Loss = 4.2642e-01, PNorm = 55.0623, GNorm = 1.6852, lr_0 = 5.4523e-04
Loss = 4.0041e-01, PNorm = 55.0664, GNorm = 1.3019, lr_0 = 5.4485e-04
Loss = 3.8805e-01, PNorm = 55.0816, GNorm = 1.7231, lr_0 = 5.4448e-04
Loss = 5.2034e-01, PNorm = 55.0883, GNorm = 1.6215, lr_0 = 5.4411e-04
Loss = 3.7477e-01, PNorm = 55.0968, GNorm = 1.5608, lr_0 = 5.4373e-04
Loss = 4.1161e-01, PNorm = 55.1039, GNorm = 1.0585, lr_0 = 5.4336e-04
Loss = 4.1208e-01, PNorm = 55.1154, GNorm = 1.1034, lr_0 = 5.4299e-04
Loss = 4.4188e-01, PNorm = 55.1268, GNorm = 1.3457, lr_0 = 5.4262e-04
Loss = 4.6714e-01, PNorm = 55.1334, GNorm = 1.3822, lr_0 = 5.4225e-04
Loss = 4.0824e-01, PNorm = 55.1464, GNorm = 1.1166, lr_0 = 5.4187e-04
Loss = 3.8101e-01, PNorm = 55.1526, GNorm = 1.4197, lr_0 = 5.4150e-04
Loss = 3.7033e-01, PNorm = 55.1640, GNorm = 1.1719, lr_0 = 5.4113e-04
Loss = 3.9124e-01, PNorm = 55.1723, GNorm = 1.3083, lr_0 = 5.4076e-04
Loss = 3.8546e-01, PNorm = 55.1733, GNorm = 1.3244, lr_0 = 5.4039e-04
Loss = 4.4310e-01, PNorm = 55.1841, GNorm = 1.2853, lr_0 = 5.4002e-04
Loss = 4.4451e-01, PNorm = 55.1989, GNorm = 1.2490, lr_0 = 5.3965e-04
Loss = 3.7014e-01, PNorm = 55.2083, GNorm = 2.2518, lr_0 = 5.3928e-04
Loss = 4.0648e-01, PNorm = 55.2142, GNorm = 2.1136, lr_0 = 5.3891e-04
Loss = 4.8382e-01, PNorm = 55.2254, GNorm = 2.1384, lr_0 = 5.3854e-04
Loss = 5.0073e-01, PNorm = 55.2365, GNorm = 1.2531, lr_0 = 5.3817e-04
Loss = 4.4664e-01, PNorm = 55.2474, GNorm = 1.4068, lr_0 = 5.3781e-04
Loss = 4.2046e-01, PNorm = 55.2616, GNorm = 1.6111, lr_0 = 5.3744e-04
Loss = 4.1695e-01, PNorm = 55.2713, GNorm = 2.1054, lr_0 = 5.3707e-04
Loss = 4.1131e-01, PNorm = 55.2808, GNorm = 1.4046, lr_0 = 5.3670e-04
Loss = 3.9984e-01, PNorm = 55.2895, GNorm = 1.1906, lr_0 = 5.3633e-04
Loss = 4.8235e-01, PNorm = 55.2963, GNorm = 3.0616, lr_0 = 5.3597e-04
Loss = 4.0242e-01, PNorm = 55.3057, GNorm = 1.0462, lr_0 = 5.3560e-04
Loss = 4.7501e-01, PNorm = 55.3081, GNorm = 1.3950, lr_0 = 5.3523e-04
Loss = 4.5071e-01, PNorm = 55.3219, GNorm = 1.5044, lr_0 = 5.3486e-04
Loss = 4.5230e-01, PNorm = 55.3313, GNorm = 1.2229, lr_0 = 5.3450e-04
Loss = 3.9173e-01, PNorm = 55.3447, GNorm = 1.1624, lr_0 = 5.3413e-04
Loss = 3.8364e-01, PNorm = 55.3606, GNorm = 1.2231, lr_0 = 5.3377e-04
Loss = 3.8588e-01, PNorm = 55.3751, GNorm = 0.8929, lr_0 = 5.3340e-04
Loss = 4.0489e-01, PNorm = 55.3876, GNorm = 1.4202, lr_0 = 5.3304e-04
Loss = 4.4844e-01, PNorm = 55.3994, GNorm = 1.1619, lr_0 = 5.3267e-04
Loss = 4.1380e-01, PNorm = 55.4056, GNorm = 1.0474, lr_0 = 5.3231e-04
Loss = 4.3594e-01, PNorm = 55.4141, GNorm = 1.1532, lr_0 = 5.3194e-04
Loss = 4.7741e-01, PNorm = 55.4264, GNorm = 1.1343, lr_0 = 5.3158e-04
Loss = 4.7039e-01, PNorm = 55.4401, GNorm = 1.2467, lr_0 = 5.3121e-04
Loss = 4.2019e-01, PNorm = 55.4504, GNorm = 1.1244, lr_0 = 5.3085e-04
Loss = 4.5087e-01, PNorm = 55.4609, GNorm = 1.9680, lr_0 = 5.3048e-04
Loss = 4.1000e-01, PNorm = 55.4672, GNorm = 1.1711, lr_0 = 5.3012e-04
Loss = 4.8432e-01, PNorm = 55.4841, GNorm = 1.1451, lr_0 = 5.2976e-04
Loss = 4.2112e-01, PNorm = 55.4880, GNorm = 1.2202, lr_0 = 5.2939e-04
Loss = 4.2886e-01, PNorm = 55.4978, GNorm = 1.1089, lr_0 = 5.2903e-04
Loss = 4.5734e-01, PNorm = 55.5141, GNorm = 1.7323, lr_0 = 5.2867e-04
Loss = 4.3842e-01, PNorm = 55.5312, GNorm = 1.5218, lr_0 = 5.2831e-04
Loss = 4.1990e-01, PNorm = 55.5387, GNorm = 1.4742, lr_0 = 5.2795e-04
Loss = 4.2304e-01, PNorm = 55.5573, GNorm = 1.6135, lr_0 = 5.2758e-04
Loss = 4.1835e-01, PNorm = 55.5714, GNorm = 1.5335, lr_0 = 5.2722e-04
Loss = 4.0024e-01, PNorm = 55.5764, GNorm = 1.3570, lr_0 = 5.2686e-04
Loss = 4.0321e-01, PNorm = 55.5803, GNorm = 0.9656, lr_0 = 5.2650e-04
Loss = 4.1105e-01, PNorm = 55.5877, GNorm = 1.4443, lr_0 = 5.2614e-04
Loss = 3.7787e-01, PNorm = 55.5933, GNorm = 1.0422, lr_0 = 5.2578e-04
Loss = 4.0152e-01, PNorm = 55.6039, GNorm = 1.4573, lr_0 = 5.2542e-04
Loss = 4.2210e-01, PNorm = 55.6137, GNorm = 0.9730, lr_0 = 5.2506e-04
Loss = 3.7472e-01, PNorm = 55.6182, GNorm = 1.3155, lr_0 = 5.2470e-04
Loss = 4.5330e-01, PNorm = 55.6256, GNorm = 1.0330, lr_0 = 5.2434e-04
Loss = 3.7930e-01, PNorm = 55.6318, GNorm = 1.6336, lr_0 = 5.2398e-04
Loss = 4.2317e-01, PNorm = 55.6428, GNorm = 1.7075, lr_0 = 5.2362e-04
Loss = 3.4574e-01, PNorm = 55.6497, GNorm = 1.3608, lr_0 = 5.2326e-04
Loss = 4.7652e-01, PNorm = 55.6591, GNorm = 1.6310, lr_0 = 5.2290e-04
Loss = 4.0080e-01, PNorm = 55.6676, GNorm = 1.3506, lr_0 = 5.2255e-04
Loss = 4.2360e-01, PNorm = 55.6695, GNorm = 1.1999, lr_0 = 5.2219e-04
Loss = 4.3181e-01, PNorm = 55.6854, GNorm = 1.0811, lr_0 = 5.2183e-04
Loss = 4.1433e-01, PNorm = 55.6937, GNorm = 1.1151, lr_0 = 5.2147e-04
Loss = 3.9850e-01, PNorm = 55.7032, GNorm = 1.4267, lr_0 = 5.2112e-04
Loss = 4.0278e-01, PNorm = 55.7158, GNorm = 1.2024, lr_0 = 5.2076e-04
Loss = 4.3595e-01, PNorm = 55.7275, GNorm = 1.0997, lr_0 = 5.2040e-04
Loss = 4.3739e-01, PNorm = 55.7338, GNorm = 1.3518, lr_0 = 5.2005e-04
Loss = 4.4920e-01, PNorm = 55.7407, GNorm = 1.0541, lr_0 = 5.1969e-04
Loss = 3.8921e-01, PNorm = 55.7525, GNorm = 1.3219, lr_0 = 5.1933e-04
Loss = 4.2562e-01, PNorm = 55.7620, GNorm = 1.4528, lr_0 = 5.1898e-04
Loss = 4.5002e-01, PNorm = 55.7743, GNorm = 1.2929, lr_0 = 5.1862e-04
Loss = 4.0187e-01, PNorm = 55.7853, GNorm = 1.3361, lr_0 = 5.1827e-04
Loss = 4.1573e-01, PNorm = 55.7909, GNorm = 1.6050, lr_0 = 5.1791e-04
Validation mae = 0.115685
Epoch 10
Loss = 4.1233e-01, PNorm = 55.7964, GNorm = 1.5348, lr_0 = 5.1756e-04
Loss = 4.4376e-01, PNorm = 55.8046, GNorm = 1.1053, lr_0 = 5.1720e-04
Loss = 4.4441e-01, PNorm = 55.8104, GNorm = 1.0354, lr_0 = 5.1685e-04
Loss = 3.9364e-01, PNorm = 55.8198, GNorm = 1.1537, lr_0 = 5.1649e-04
Loss = 4.1696e-01, PNorm = 55.8269, GNorm = 1.4366, lr_0 = 5.1614e-04
Loss = 4.0286e-01, PNorm = 55.8339, GNorm = 1.6927, lr_0 = 5.1579e-04
Loss = 3.3587e-01, PNorm = 55.8443, GNorm = 0.9572, lr_0 = 5.1543e-04
Loss = 3.7939e-01, PNorm = 55.8523, GNorm = 1.5148, lr_0 = 5.1508e-04
Loss = 3.9589e-01, PNorm = 55.8636, GNorm = 1.0823, lr_0 = 5.1473e-04
Loss = 3.9395e-01, PNorm = 55.8739, GNorm = 1.1487, lr_0 = 5.1437e-04
Loss = 4.1571e-01, PNorm = 55.8843, GNorm = 1.5404, lr_0 = 5.1402e-04
Loss = 4.0188e-01, PNorm = 55.8938, GNorm = 2.2190, lr_0 = 5.1367e-04
Loss = 4.2743e-01, PNorm = 55.9050, GNorm = 1.3235, lr_0 = 5.1332e-04
Loss = 4.5692e-01, PNorm = 55.9149, GNorm = 1.3842, lr_0 = 5.1297e-04
Loss = 4.5082e-01, PNorm = 55.9220, GNorm = 2.3219, lr_0 = 5.1262e-04
Loss = 4.2022e-01, PNorm = 55.9284, GNorm = 1.4548, lr_0 = 5.1226e-04
Loss = 4.6105e-01, PNorm = 55.9353, GNorm = 1.8014, lr_0 = 5.1191e-04
Loss = 4.0589e-01, PNorm = 55.9413, GNorm = 1.4014, lr_0 = 5.1156e-04
Loss = 4.7550e-01, PNorm = 55.9532, GNorm = 1.5333, lr_0 = 5.1121e-04
Loss = 3.9421e-01, PNorm = 55.9617, GNorm = 2.0407, lr_0 = 5.1086e-04
Loss = 3.9885e-01, PNorm = 55.9694, GNorm = 1.3464, lr_0 = 5.1051e-04
Loss = 3.8706e-01, PNorm = 55.9765, GNorm = 0.9668, lr_0 = 5.1016e-04
Loss = 4.1272e-01, PNorm = 55.9904, GNorm = 1.1712, lr_0 = 5.0981e-04
Loss = 4.2254e-01, PNorm = 55.9983, GNorm = 1.0080, lr_0 = 5.0946e-04
Loss = 4.0770e-01, PNorm = 56.0061, GNorm = 1.4041, lr_0 = 5.0911e-04
Loss = 4.1177e-01, PNorm = 56.0153, GNorm = 1.0748, lr_0 = 5.0877e-04
Loss = 4.0734e-01, PNorm = 56.0234, GNorm = 1.1762, lr_0 = 5.0842e-04
Loss = 3.5843e-01, PNorm = 56.0337, GNorm = 1.7854, lr_0 = 5.0807e-04
Loss = 3.7443e-01, PNorm = 56.0363, GNorm = 1.5085, lr_0 = 5.0772e-04
Loss = 3.8532e-01, PNorm = 56.0527, GNorm = 2.4397, lr_0 = 5.0737e-04
Loss = 4.0471e-01, PNorm = 56.0622, GNorm = 1.2258, lr_0 = 5.0703e-04
Loss = 3.7629e-01, PNorm = 56.0749, GNorm = 1.3817, lr_0 = 5.0668e-04
Loss = 4.4365e-01, PNorm = 56.0859, GNorm = 1.2622, lr_0 = 5.0633e-04
Loss = 4.2586e-01, PNorm = 56.0950, GNorm = 1.2554, lr_0 = 5.0598e-04
Loss = 3.8035e-01, PNorm = 56.1076, GNorm = 1.4152, lr_0 = 5.0564e-04
Loss = 4.0140e-01, PNorm = 56.1160, GNorm = 1.1326, lr_0 = 5.0529e-04
Loss = 4.0943e-01, PNorm = 56.1280, GNorm = 3.1747, lr_0 = 5.0494e-04
Loss = 3.6754e-01, PNorm = 56.1350, GNorm = 1.2016, lr_0 = 5.0460e-04
Loss = 4.1160e-01, PNorm = 56.1456, GNorm = 1.1814, lr_0 = 5.0425e-04
Loss = 4.0602e-01, PNorm = 56.1509, GNorm = 1.0957, lr_0 = 5.0391e-04
Loss = 4.0435e-01, PNorm = 56.1654, GNorm = 1.1215, lr_0 = 5.0356e-04
Loss = 3.6625e-01, PNorm = 56.1767, GNorm = 1.9406, lr_0 = 5.0322e-04
Loss = 4.1137e-01, PNorm = 56.1869, GNorm = 1.3046, lr_0 = 5.0287e-04
Loss = 5.0163e-01, PNorm = 56.1992, GNorm = 2.7156, lr_0 = 5.0253e-04
Loss = 4.3937e-01, PNorm = 56.2078, GNorm = 1.5260, lr_0 = 5.0218e-04
Loss = 4.5154e-01, PNorm = 56.2112, GNorm = 1.2161, lr_0 = 5.0184e-04
Loss = 4.2108e-01, PNorm = 56.2213, GNorm = 0.8546, lr_0 = 5.0150e-04
Loss = 3.6673e-01, PNorm = 56.2287, GNorm = 0.7686, lr_0 = 5.0115e-04
Loss = 4.1999e-01, PNorm = 56.2341, GNorm = 1.0724, lr_0 = 5.0081e-04
Loss = 3.6467e-01, PNorm = 56.2414, GNorm = 1.2371, lr_0 = 5.0047e-04
Loss = 4.3967e-01, PNorm = 56.2507, GNorm = 1.1516, lr_0 = 5.0012e-04
Loss = 4.5804e-01, PNorm = 56.2577, GNorm = 1.1783, lr_0 = 4.9978e-04
Loss = 4.3349e-01, PNorm = 56.2679, GNorm = 1.0469, lr_0 = 4.9944e-04
Loss = 4.0171e-01, PNorm = 56.2759, GNorm = 1.8542, lr_0 = 4.9910e-04
Loss = 4.4575e-01, PNorm = 56.2827, GNorm = 1.7621, lr_0 = 4.9875e-04
Loss = 4.6640e-01, PNorm = 56.2890, GNorm = 1.5141, lr_0 = 4.9841e-04
Loss = 4.3274e-01, PNorm = 56.2998, GNorm = 1.2278, lr_0 = 4.9807e-04
Loss = 4.6570e-01, PNorm = 56.3072, GNorm = 1.5189, lr_0 = 4.9773e-04
Loss = 4.0239e-01, PNorm = 56.3224, GNorm = 1.4870, lr_0 = 4.9739e-04
Loss = 3.9111e-01, PNorm = 56.3336, GNorm = 1.3604, lr_0 = 4.9705e-04
Loss = 4.9037e-01, PNorm = 56.3469, GNorm = 2.7213, lr_0 = 4.9671e-04
Loss = 3.7801e-01, PNorm = 56.3588, GNorm = 1.6436, lr_0 = 4.9637e-04
Loss = 4.3832e-01, PNorm = 56.3671, GNorm = 1.8189, lr_0 = 4.9603e-04
Loss = 4.0283e-01, PNorm = 56.3722, GNorm = 1.3462, lr_0 = 4.9569e-04
Loss = 4.1735e-01, PNorm = 56.3768, GNorm = 1.5991, lr_0 = 4.9535e-04
Loss = 4.4916e-01, PNorm = 56.3876, GNorm = 2.0354, lr_0 = 4.9501e-04
Loss = 3.9640e-01, PNorm = 56.4005, GNorm = 2.9015, lr_0 = 4.9467e-04
Loss = 4.5859e-01, PNorm = 56.4143, GNorm = 1.4661, lr_0 = 4.9433e-04
Loss = 4.3776e-01, PNorm = 56.4283, GNorm = 2.3987, lr_0 = 4.9399e-04
Loss = 4.1229e-01, PNorm = 56.4376, GNorm = 1.3775, lr_0 = 4.9365e-04
Loss = 4.2360e-01, PNorm = 56.4506, GNorm = 1.6814, lr_0 = 4.9332e-04
Loss = 4.0169e-01, PNorm = 56.4607, GNorm = 2.4237, lr_0 = 4.9298e-04
Loss = 4.4811e-01, PNorm = 56.4699, GNorm = 1.3734, lr_0 = 4.9264e-04
Loss = 4.7931e-01, PNorm = 56.4830, GNorm = 1.3521, lr_0 = 4.9230e-04
Loss = 4.3264e-01, PNorm = 56.4918, GNorm = 1.1434, lr_0 = 4.9197e-04
Loss = 4.2049e-01, PNorm = 56.5027, GNorm = 2.0612, lr_0 = 4.9163e-04
Loss = 4.0925e-01, PNorm = 56.5119, GNorm = 1.2790, lr_0 = 4.9129e-04
Loss = 3.9595e-01, PNorm = 56.5232, GNorm = 1.1697, lr_0 = 4.9095e-04
Loss = 4.1071e-01, PNorm = 56.5316, GNorm = 1.3262, lr_0 = 4.9062e-04
Loss = 4.0025e-01, PNorm = 56.5410, GNorm = 1.2744, lr_0 = 4.9028e-04
Loss = 4.0106e-01, PNorm = 56.5491, GNorm = 1.1406, lr_0 = 4.8995e-04
Loss = 4.8901e-01, PNorm = 56.5609, GNorm = 1.7000, lr_0 = 4.8961e-04
Loss = 4.1501e-01, PNorm = 56.5698, GNorm = 2.6229, lr_0 = 4.8928e-04
Loss = 4.7875e-01, PNorm = 56.5782, GNorm = 2.1144, lr_0 = 4.8894e-04
Loss = 4.2823e-01, PNorm = 56.5858, GNorm = 1.6777, lr_0 = 4.8861e-04
Loss = 4.4221e-01, PNorm = 56.5939, GNorm = 1.0334, lr_0 = 4.8827e-04
Loss = 3.8750e-01, PNorm = 56.6015, GNorm = 1.4302, lr_0 = 4.8794e-04
Loss = 4.1715e-01, PNorm = 56.6038, GNorm = 1.8405, lr_0 = 4.8760e-04
Loss = 4.2887e-01, PNorm = 56.6147, GNorm = 1.4640, lr_0 = 4.8727e-04
Loss = 3.8367e-01, PNorm = 56.6192, GNorm = 1.8506, lr_0 = 4.8693e-04
Loss = 4.1523e-01, PNorm = 56.6286, GNorm = 1.1032, lr_0 = 4.8660e-04
Loss = 3.5900e-01, PNorm = 56.6427, GNorm = 1.1360, lr_0 = 4.8627e-04
Loss = 4.3207e-01, PNorm = 56.6518, GNorm = 1.5998, lr_0 = 4.8593e-04
Loss = 4.1411e-01, PNorm = 56.6586, GNorm = 1.7971, lr_0 = 4.8560e-04
Loss = 3.5262e-01, PNorm = 56.6645, GNorm = 1.2069, lr_0 = 4.8527e-04
Loss = 3.7917e-01, PNorm = 56.6721, GNorm = 1.6190, lr_0 = 4.8494e-04
Loss = 3.9329e-01, PNorm = 56.6777, GNorm = 0.9909, lr_0 = 4.8460e-04
Loss = 3.9261e-01, PNorm = 56.6849, GNorm = 1.1456, lr_0 = 4.8427e-04
Loss = 4.0524e-01, PNorm = 56.6926, GNorm = 1.1093, lr_0 = 4.8394e-04
Loss = 4.2219e-01, PNorm = 56.7054, GNorm = 1.7456, lr_0 = 4.8361e-04
Loss = 4.2378e-01, PNorm = 56.7183, GNorm = 1.8993, lr_0 = 4.8328e-04
Loss = 4.9345e-01, PNorm = 56.7260, GNorm = 2.4751, lr_0 = 4.8295e-04
Loss = 3.8761e-01, PNorm = 56.7367, GNorm = 1.4312, lr_0 = 4.8262e-04
Loss = 4.2037e-01, PNorm = 56.7508, GNorm = 1.1463, lr_0 = 4.8228e-04
Loss = 4.0275e-01, PNorm = 56.7640, GNorm = 1.6489, lr_0 = 4.8195e-04
Loss = 4.6484e-01, PNorm = 56.7713, GNorm = 1.1442, lr_0 = 4.8162e-04
Loss = 4.4084e-01, PNorm = 56.7841, GNorm = 1.2924, lr_0 = 4.8129e-04
Loss = 4.1574e-01, PNorm = 56.7929, GNorm = 1.0736, lr_0 = 4.8096e-04
Loss = 3.5355e-01, PNorm = 56.8046, GNorm = 1.7140, lr_0 = 4.8064e-04
Loss = 3.5598e-01, PNorm = 56.8113, GNorm = 0.9462, lr_0 = 4.8031e-04
Loss = 4.5574e-01, PNorm = 56.8187, GNorm = 1.1004, lr_0 = 4.7998e-04
Loss = 4.4241e-01, PNorm = 56.8238, GNorm = 1.4071, lr_0 = 4.7965e-04
Loss = 3.9314e-01, PNorm = 56.8274, GNorm = 1.9079, lr_0 = 4.7932e-04
Loss = 4.1515e-01, PNorm = 56.8302, GNorm = 1.0514, lr_0 = 4.7899e-04
Loss = 4.0020e-01, PNorm = 56.8410, GNorm = 1.6403, lr_0 = 4.7866e-04
Loss = 4.0110e-01, PNorm = 56.8445, GNorm = 1.0509, lr_0 = 4.7833e-04
Loss = 3.9792e-01, PNorm = 56.8547, GNorm = 1.6880, lr_0 = 4.7801e-04
Loss = 4.2367e-01, PNorm = 56.8637, GNorm = 1.2631, lr_0 = 4.7768e-04
Loss = 4.5615e-01, PNorm = 56.8730, GNorm = 0.9901, lr_0 = 4.7735e-04
Loss = 4.1001e-01, PNorm = 56.8793, GNorm = 1.5583, lr_0 = 4.7703e-04
Validation mae = 0.115271
Epoch 11
Loss = 4.0398e-01, PNorm = 56.8831, GNorm = 2.4147, lr_0 = 4.7670e-04
Loss = 4.1924e-01, PNorm = 56.8858, GNorm = 1.8211, lr_0 = 4.7637e-04
Loss = 4.2912e-01, PNorm = 56.8996, GNorm = 1.9123, lr_0 = 4.7605e-04
Loss = 4.0577e-01, PNorm = 56.9071, GNorm = 1.2545, lr_0 = 4.7572e-04
Loss = 3.7899e-01, PNorm = 56.9153, GNorm = 1.0258, lr_0 = 4.7539e-04
Loss = 4.0232e-01, PNorm = 56.9253, GNorm = 1.3105, lr_0 = 4.7507e-04
Loss = 4.2505e-01, PNorm = 56.9279, GNorm = 1.2191, lr_0 = 4.7474e-04
Loss = 4.1098e-01, PNorm = 56.9360, GNorm = 1.4611, lr_0 = 4.7442e-04
Loss = 4.2277e-01, PNorm = 56.9473, GNorm = 1.6377, lr_0 = 4.7409e-04
Loss = 4.7717e-01, PNorm = 56.9620, GNorm = 1.5643, lr_0 = 4.7377e-04
Loss = 3.8107e-01, PNorm = 56.9716, GNorm = 1.2416, lr_0 = 4.7344e-04
Loss = 4.4534e-01, PNorm = 56.9849, GNorm = 1.2483, lr_0 = 4.7312e-04
Loss = 3.9959e-01, PNorm = 56.9923, GNorm = 1.0443, lr_0 = 4.7279e-04
Loss = 4.2450e-01, PNorm = 56.9985, GNorm = 1.4012, lr_0 = 4.7247e-04
Loss = 3.8779e-01, PNorm = 57.0018, GNorm = 0.9969, lr_0 = 4.7215e-04
Loss = 4.9034e-01, PNorm = 57.0063, GNorm = 1.7226, lr_0 = 4.7182e-04
Loss = 4.5432e-01, PNorm = 57.0159, GNorm = 1.6708, lr_0 = 4.7150e-04
Loss = 4.0376e-01, PNorm = 57.0235, GNorm = 1.3897, lr_0 = 4.7118e-04
Loss = 4.4265e-01, PNorm = 57.0328, GNorm = 1.4991, lr_0 = 4.7085e-04
Loss = 4.1527e-01, PNorm = 57.0417, GNorm = 1.8246, lr_0 = 4.7053e-04
Loss = 3.1860e-01, PNorm = 57.0475, GNorm = 1.5223, lr_0 = 4.7021e-04
Loss = 3.9311e-01, PNorm = 57.0510, GNorm = 1.3277, lr_0 = 4.6989e-04
Loss = 3.6451e-01, PNorm = 57.0579, GNorm = 1.1055, lr_0 = 4.6957e-04
Loss = 3.8098e-01, PNorm = 57.0648, GNorm = 1.1816, lr_0 = 4.6924e-04
Loss = 4.0271e-01, PNorm = 57.0741, GNorm = 1.0858, lr_0 = 4.6892e-04
Loss = 4.0912e-01, PNorm = 57.0842, GNorm = 1.4050, lr_0 = 4.6860e-04
Loss = 4.5119e-01, PNorm = 57.1002, GNorm = 1.3576, lr_0 = 4.6828e-04
Loss = 4.1538e-01, PNorm = 57.1076, GNorm = 1.1674, lr_0 = 4.6796e-04
Loss = 3.5710e-01, PNorm = 57.1171, GNorm = 1.1174, lr_0 = 4.6764e-04
Loss = 4.4236e-01, PNorm = 57.1211, GNorm = 2.3258, lr_0 = 4.6732e-04
Loss = 4.3568e-01, PNorm = 57.1290, GNorm = 2.8342, lr_0 = 4.6700e-04
Loss = 3.7464e-01, PNorm = 57.1329, GNorm = 1.1134, lr_0 = 4.6668e-04
Loss = 3.8836e-01, PNorm = 57.1413, GNorm = 1.0321, lr_0 = 4.6636e-04
Loss = 4.2446e-01, PNorm = 57.1495, GNorm = 1.4306, lr_0 = 4.6604e-04
Loss = 3.7613e-01, PNorm = 57.1600, GNorm = 1.3883, lr_0 = 4.6572e-04
Loss = 3.5708e-01, PNorm = 57.1674, GNorm = 1.3123, lr_0 = 4.6540e-04
Loss = 4.3403e-01, PNorm = 57.1770, GNorm = 1.5714, lr_0 = 4.6508e-04
Loss = 4.5159e-01, PNorm = 57.1842, GNorm = 1.5580, lr_0 = 4.6476e-04
Loss = 4.3063e-01, PNorm = 57.1972, GNorm = 1.9942, lr_0 = 4.6445e-04
Loss = 4.1470e-01, PNorm = 57.2098, GNorm = 2.0397, lr_0 = 4.6413e-04
Loss = 4.6682e-01, PNorm = 57.2217, GNorm = 1.7543, lr_0 = 4.6381e-04
Loss = 4.7870e-01, PNorm = 57.2339, GNorm = 1.2937, lr_0 = 4.6349e-04
Loss = 3.8349e-01, PNorm = 57.2417, GNorm = 1.2832, lr_0 = 4.6317e-04
Loss = 4.3136e-01, PNorm = 57.2500, GNorm = 1.6076, lr_0 = 4.6286e-04
Loss = 4.3006e-01, PNorm = 57.2554, GNorm = 2.0288, lr_0 = 4.6254e-04
Loss = 4.4288e-01, PNorm = 57.2645, GNorm = 1.8161, lr_0 = 4.6222e-04
Loss = 4.3422e-01, PNorm = 57.2723, GNorm = 1.7123, lr_0 = 4.6191e-04
Loss = 3.7655e-01, PNorm = 57.2826, GNorm = 1.5523, lr_0 = 4.6159e-04
Loss = 4.4004e-01, PNorm = 57.2939, GNorm = 1.8592, lr_0 = 4.6127e-04
Loss = 4.6101e-01, PNorm = 57.3017, GNorm = 1.3763, lr_0 = 4.6096e-04
Loss = 3.9671e-01, PNorm = 57.3095, GNorm = 1.5131, lr_0 = 4.6064e-04
Loss = 3.9108e-01, PNorm = 57.3160, GNorm = 1.3818, lr_0 = 4.6033e-04
Loss = 4.1543e-01, PNorm = 57.3215, GNorm = 1.3453, lr_0 = 4.6001e-04
Loss = 4.2475e-01, PNorm = 57.3311, GNorm = 1.3363, lr_0 = 4.5970e-04
Loss = 3.5322e-01, PNorm = 57.3385, GNorm = 1.1481, lr_0 = 4.5938e-04
Loss = 4.0536e-01, PNorm = 57.3459, GNorm = 1.4347, lr_0 = 4.5907e-04
Loss = 3.8829e-01, PNorm = 57.3534, GNorm = 1.6724, lr_0 = 4.5875e-04
Loss = 3.5707e-01, PNorm = 57.3564, GNorm = 1.5189, lr_0 = 4.5844e-04
Loss = 4.1490e-01, PNorm = 57.3652, GNorm = 1.6190, lr_0 = 4.5812e-04
Loss = 3.8857e-01, PNorm = 57.3690, GNorm = 1.5607, lr_0 = 4.5781e-04
Loss = 4.0022e-01, PNorm = 57.3716, GNorm = 1.0084, lr_0 = 4.5750e-04
Loss = 4.1122e-01, PNorm = 57.3802, GNorm = 2.3338, lr_0 = 4.5718e-04
Loss = 4.2235e-01, PNorm = 57.3860, GNorm = 1.2644, lr_0 = 4.5687e-04
Loss = 4.1952e-01, PNorm = 57.3908, GNorm = 2.9121, lr_0 = 4.5656e-04
Loss = 4.8757e-01, PNorm = 57.4000, GNorm = 1.8879, lr_0 = 4.5624e-04
Loss = 4.1017e-01, PNorm = 57.4098, GNorm = 1.6561, lr_0 = 4.5593e-04
Loss = 4.0132e-01, PNorm = 57.4181, GNorm = 1.1057, lr_0 = 4.5562e-04
Loss = 4.0270e-01, PNorm = 57.4224, GNorm = 1.0024, lr_0 = 4.5531e-04
Loss = 3.8872e-01, PNorm = 57.4260, GNorm = 1.4520, lr_0 = 4.5499e-04
Loss = 3.9560e-01, PNorm = 57.4295, GNorm = 1.4356, lr_0 = 4.5468e-04
Loss = 3.7217e-01, PNorm = 57.4366, GNorm = 1.4550, lr_0 = 4.5437e-04
Loss = 3.8053e-01, PNorm = 57.4448, GNorm = 1.5801, lr_0 = 4.5406e-04
Loss = 3.5883e-01, PNorm = 57.4549, GNorm = 1.1581, lr_0 = 4.5375e-04
Loss = 4.3370e-01, PNorm = 57.4594, GNorm = 1.5941, lr_0 = 4.5344e-04
Loss = 3.5330e-01, PNorm = 57.4676, GNorm = 1.4624, lr_0 = 4.5313e-04
Loss = 4.1164e-01, PNorm = 57.4742, GNorm = 1.5725, lr_0 = 4.5282e-04
Loss = 3.8645e-01, PNorm = 57.4810, GNorm = 1.3322, lr_0 = 4.5251e-04
Loss = 4.9777e-01, PNorm = 57.4950, GNorm = 1.4547, lr_0 = 4.5220e-04
Loss = 3.6558e-01, PNorm = 57.5015, GNorm = 2.0826, lr_0 = 4.5189e-04
Loss = 4.7431e-01, PNorm = 57.5064, GNorm = 2.1766, lr_0 = 4.5158e-04
Loss = 4.8933e-01, PNorm = 57.5166, GNorm = 1.4904, lr_0 = 4.5127e-04
Loss = 4.3458e-01, PNorm = 57.5281, GNorm = 1.4276, lr_0 = 4.5096e-04
Loss = 4.2160e-01, PNorm = 57.5366, GNorm = 1.7439, lr_0 = 4.5065e-04
Loss = 4.1644e-01, PNorm = 57.5425, GNorm = 1.5352, lr_0 = 4.5034e-04
Loss = 3.9783e-01, PNorm = 57.5529, GNorm = 1.3353, lr_0 = 4.5003e-04
Loss = 3.7094e-01, PNorm = 57.5574, GNorm = 1.3536, lr_0 = 4.4972e-04
Loss = 4.4438e-01, PNorm = 57.5676, GNorm = 1.3799, lr_0 = 4.4942e-04
Loss = 3.9476e-01, PNorm = 57.5762, GNorm = 1.8032, lr_0 = 4.4911e-04
Loss = 4.4135e-01, PNorm = 57.5806, GNorm = 1.2356, lr_0 = 4.4880e-04
Loss = 4.1437e-01, PNorm = 57.5926, GNorm = 1.1711, lr_0 = 4.4849e-04
Loss = 3.3992e-01, PNorm = 57.5975, GNorm = 1.3493, lr_0 = 4.4819e-04
Loss = 3.2499e-01, PNorm = 57.6033, GNorm = 1.3636, lr_0 = 4.4788e-04
Loss = 3.7645e-01, PNorm = 57.6124, GNorm = 1.4195, lr_0 = 4.4757e-04
Loss = 3.8124e-01, PNorm = 57.6213, GNorm = 1.2126, lr_0 = 4.4727e-04
Loss = 4.2091e-01, PNorm = 57.6286, GNorm = 1.4524, lr_0 = 4.4696e-04
Loss = 4.5563e-01, PNorm = 57.6314, GNorm = 1.1054, lr_0 = 4.4665e-04
Loss = 4.6242e-01, PNorm = 57.6418, GNorm = 1.6934, lr_0 = 4.4635e-04
Loss = 4.4014e-01, PNorm = 57.6563, GNorm = 1.9438, lr_0 = 4.4604e-04
Loss = 4.0677e-01, PNorm = 57.6684, GNorm = 1.3335, lr_0 = 4.4574e-04
Loss = 4.1471e-01, PNorm = 57.6810, GNorm = 1.0815, lr_0 = 4.4543e-04
Loss = 4.4248e-01, PNorm = 57.6887, GNorm = 1.5536, lr_0 = 4.4513e-04
Loss = 3.7045e-01, PNorm = 57.6958, GNorm = 1.7872, lr_0 = 4.4482e-04
Loss = 4.2068e-01, PNorm = 57.7014, GNorm = 1.6983, lr_0 = 4.4452e-04
Loss = 4.0213e-01, PNorm = 57.7084, GNorm = 1.2979, lr_0 = 4.4421e-04
Loss = 3.9654e-01, PNorm = 57.7160, GNorm = 2.1063, lr_0 = 4.4391e-04
Loss = 3.6621e-01, PNorm = 57.7252, GNorm = 1.3070, lr_0 = 4.4360e-04
Loss = 4.0936e-01, PNorm = 57.7345, GNorm = 1.4014, lr_0 = 4.4330e-04
Loss = 3.6750e-01, PNorm = 57.7378, GNorm = 1.4731, lr_0 = 4.4299e-04
Loss = 3.4414e-01, PNorm = 57.7484, GNorm = 1.9504, lr_0 = 4.4269e-04
Loss = 4.2754e-01, PNorm = 57.7550, GNorm = 1.3955, lr_0 = 4.4239e-04
Loss = 3.5002e-01, PNorm = 57.7645, GNorm = 1.3700, lr_0 = 4.4209e-04
Loss = 4.3224e-01, PNorm = 57.7659, GNorm = 1.8230, lr_0 = 4.4178e-04
Loss = 4.4722e-01, PNorm = 57.7755, GNorm = 1.0516, lr_0 = 4.4148e-04
Loss = 3.8749e-01, PNorm = 57.7852, GNorm = 1.4542, lr_0 = 4.4118e-04
Loss = 4.3015e-01, PNorm = 57.7937, GNorm = 1.3537, lr_0 = 4.4088e-04
Loss = 4.1573e-01, PNorm = 57.7964, GNorm = 1.4232, lr_0 = 4.4057e-04
Loss = 3.8012e-01, PNorm = 57.8025, GNorm = 1.6743, lr_0 = 4.4027e-04
Loss = 3.8461e-01, PNorm = 57.8084, GNorm = 1.1493, lr_0 = 4.3997e-04
Loss = 4.1907e-01, PNorm = 57.8123, GNorm = 1.2932, lr_0 = 4.3967e-04
Loss = 3.9159e-01, PNorm = 57.8193, GNorm = 1.5389, lr_0 = 4.3937e-04
Validation mae = 0.113904
Epoch 12
Loss = 4.1904e-01, PNorm = 57.8276, GNorm = 1.0265, lr_0 = 4.3907e-04
Loss = 4.3757e-01, PNorm = 57.8375, GNorm = 1.5752, lr_0 = 4.3877e-04
Loss = 3.5188e-01, PNorm = 57.8426, GNorm = 0.9592, lr_0 = 4.3846e-04
Loss = 4.4833e-01, PNorm = 57.8527, GNorm = 1.2367, lr_0 = 4.3816e-04
Loss = 4.2902e-01, PNorm = 57.8558, GNorm = 1.4233, lr_0 = 4.3786e-04
Loss = 3.8576e-01, PNorm = 57.8632, GNorm = 1.4051, lr_0 = 4.3756e-04
Loss = 4.4303e-01, PNorm = 57.8670, GNorm = 1.3240, lr_0 = 4.3726e-04
Loss = 3.7333e-01, PNorm = 57.8762, GNorm = 1.3416, lr_0 = 4.3696e-04
Loss = 4.1077e-01, PNorm = 57.8797, GNorm = 1.3013, lr_0 = 4.3667e-04
Loss = 4.3682e-01, PNorm = 57.8859, GNorm = 1.6090, lr_0 = 4.3637e-04
Loss = 4.2553e-01, PNorm = 57.8948, GNorm = 1.0520, lr_0 = 4.3607e-04
Loss = 3.9411e-01, PNorm = 57.9064, GNorm = 1.1377, lr_0 = 4.3577e-04
Loss = 3.7477e-01, PNorm = 57.9157, GNorm = 1.4418, lr_0 = 4.3547e-04
Loss = 3.8427e-01, PNorm = 57.9194, GNorm = 1.6251, lr_0 = 4.3517e-04
Loss = 3.8759e-01, PNorm = 57.9245, GNorm = 1.9772, lr_0 = 4.3487e-04
Loss = 3.3157e-01, PNorm = 57.9292, GNorm = 0.8202, lr_0 = 4.3458e-04
Loss = 5.0011e-01, PNorm = 57.9364, GNorm = 1.7380, lr_0 = 4.3428e-04
Loss = 4.1289e-01, PNorm = 57.9433, GNorm = 1.4235, lr_0 = 4.3398e-04
Loss = 3.9703e-01, PNorm = 57.9489, GNorm = 1.1258, lr_0 = 4.3368e-04
Loss = 4.0204e-01, PNorm = 57.9524, GNorm = 1.9509, lr_0 = 4.3339e-04
Loss = 3.7649e-01, PNorm = 57.9604, GNorm = 1.2893, lr_0 = 4.3309e-04
Loss = 3.8441e-01, PNorm = 57.9677, GNorm = 1.1420, lr_0 = 4.3279e-04
Loss = 4.0290e-01, PNorm = 57.9721, GNorm = 1.5716, lr_0 = 4.3250e-04
Loss = 3.8943e-01, PNorm = 57.9837, GNorm = 1.3268, lr_0 = 4.3220e-04
Loss = 4.1767e-01, PNorm = 57.9978, GNorm = 1.4321, lr_0 = 4.3190e-04
Loss = 3.6387e-01, PNorm = 58.0035, GNorm = 1.4331, lr_0 = 4.3161e-04
Loss = 3.8537e-01, PNorm = 58.0034, GNorm = 1.4462, lr_0 = 4.3131e-04
Loss = 4.3642e-01, PNorm = 58.0101, GNorm = 1.0539, lr_0 = 4.3102e-04
Loss = 4.3122e-01, PNorm = 58.0178, GNorm = 2.5094, lr_0 = 4.3072e-04
Loss = 3.8970e-01, PNorm = 58.0270, GNorm = 0.8280, lr_0 = 4.3043e-04
Loss = 4.1389e-01, PNorm = 58.0326, GNorm = 1.7898, lr_0 = 4.3013e-04
Loss = 3.7080e-01, PNorm = 58.0443, GNorm = 1.0308, lr_0 = 4.2984e-04
Loss = 3.7321e-01, PNorm = 58.0508, GNorm = 1.1315, lr_0 = 4.2954e-04
Loss = 3.4961e-01, PNorm = 58.0597, GNorm = 1.2208, lr_0 = 4.2925e-04
Loss = 3.7564e-01, PNorm = 58.0621, GNorm = 1.3276, lr_0 = 4.2895e-04
Loss = 4.1299e-01, PNorm = 58.0718, GNorm = 1.7602, lr_0 = 4.2866e-04
Loss = 4.4056e-01, PNorm = 58.0824, GNorm = 1.4900, lr_0 = 4.2837e-04
Loss = 3.9476e-01, PNorm = 58.0844, GNorm = 1.8443, lr_0 = 4.2807e-04
Loss = 4.0098e-01, PNorm = 58.0939, GNorm = 1.3064, lr_0 = 4.2778e-04
Loss = 4.1142e-01, PNorm = 58.1022, GNorm = 1.7177, lr_0 = 4.2749e-04
Loss = 3.5398e-01, PNorm = 58.1182, GNorm = 1.2922, lr_0 = 4.2719e-04
Loss = 3.9981e-01, PNorm = 58.1264, GNorm = 1.8164, lr_0 = 4.2690e-04
Loss = 3.9678e-01, PNorm = 58.1327, GNorm = 0.8609, lr_0 = 4.2661e-04
Loss = 3.8365e-01, PNorm = 58.1368, GNorm = 1.1062, lr_0 = 4.2632e-04
Loss = 4.2742e-01, PNorm = 58.1407, GNorm = 1.9979, lr_0 = 4.2602e-04
Loss = 4.0401e-01, PNorm = 58.1473, GNorm = 1.1748, lr_0 = 4.2573e-04
Loss = 3.2462e-01, PNorm = 58.1583, GNorm = 1.2842, lr_0 = 4.2544e-04
Loss = 4.0072e-01, PNorm = 58.1648, GNorm = 2.0092, lr_0 = 4.2515e-04
Loss = 4.2236e-01, PNorm = 58.1707, GNorm = 1.4227, lr_0 = 4.2486e-04
Loss = 3.7302e-01, PNorm = 58.1776, GNorm = 1.1872, lr_0 = 4.2457e-04
Loss = 4.2312e-01, PNorm = 58.1797, GNorm = 0.9511, lr_0 = 4.2428e-04
Loss = 4.3456e-01, PNorm = 58.1856, GNorm = 1.8404, lr_0 = 4.2399e-04
Loss = 4.4250e-01, PNorm = 58.1949, GNorm = 2.6001, lr_0 = 4.2370e-04
Loss = 4.0628e-01, PNorm = 58.1971, GNorm = 1.4107, lr_0 = 4.2340e-04
Loss = 4.1194e-01, PNorm = 58.2015, GNorm = 1.1130, lr_0 = 4.2311e-04
Loss = 3.7405e-01, PNorm = 58.2095, GNorm = 1.1781, lr_0 = 4.2283e-04
Loss = 3.9719e-01, PNorm = 58.2177, GNorm = 1.4035, lr_0 = 4.2254e-04
Loss = 3.6848e-01, PNorm = 58.2241, GNorm = 1.2463, lr_0 = 4.2225e-04
Loss = 3.9731e-01, PNorm = 58.2262, GNorm = 1.1587, lr_0 = 4.2196e-04
Loss = 3.9987e-01, PNorm = 58.2341, GNorm = 1.4526, lr_0 = 4.2167e-04
Loss = 4.0592e-01, PNorm = 58.2395, GNorm = 1.0636, lr_0 = 4.2138e-04
Loss = 4.0723e-01, PNorm = 58.2462, GNorm = 1.8891, lr_0 = 4.2109e-04
Loss = 4.0196e-01, PNorm = 58.2565, GNorm = 1.2747, lr_0 = 4.2080e-04
Loss = 3.6435e-01, PNorm = 58.2659, GNorm = 1.2768, lr_0 = 4.2051e-04
Loss = 4.1971e-01, PNorm = 58.2677, GNorm = 1.0092, lr_0 = 4.2023e-04
Loss = 4.5274e-01, PNorm = 58.2723, GNorm = 1.5711, lr_0 = 4.1994e-04
Loss = 4.2313e-01, PNorm = 58.2804, GNorm = 1.2544, lr_0 = 4.1965e-04
Loss = 4.3224e-01, PNorm = 58.2870, GNorm = 1.4148, lr_0 = 4.1936e-04
Loss = 3.9427e-01, PNorm = 58.2937, GNorm = 1.5027, lr_0 = 4.1907e-04
Loss = 3.9412e-01, PNorm = 58.3017, GNorm = 1.9065, lr_0 = 4.1879e-04
Loss = 4.3120e-01, PNorm = 58.3119, GNorm = 1.6763, lr_0 = 4.1850e-04
Loss = 3.5988e-01, PNorm = 58.3170, GNorm = 1.0583, lr_0 = 4.1821e-04
Loss = 3.8743e-01, PNorm = 58.3217, GNorm = 1.5152, lr_0 = 4.1793e-04
Loss = 4.0705e-01, PNorm = 58.3279, GNorm = 1.9028, lr_0 = 4.1764e-04
Loss = 3.5535e-01, PNorm = 58.3285, GNorm = 1.3646, lr_0 = 4.1736e-04
Loss = 3.7948e-01, PNorm = 58.3318, GNorm = 1.2280, lr_0 = 4.1707e-04
Loss = 3.7873e-01, PNorm = 58.3342, GNorm = 1.5366, lr_0 = 4.1678e-04
Loss = 4.4361e-01, PNorm = 58.3398, GNorm = 1.5242, lr_0 = 4.1650e-04
Loss = 4.7395e-01, PNorm = 58.3469, GNorm = 1.2723, lr_0 = 4.1621e-04
Loss = 3.6134e-01, PNorm = 58.3550, GNorm = 1.6078, lr_0 = 4.1593e-04
Loss = 3.9229e-01, PNorm = 58.3596, GNorm = 1.3430, lr_0 = 4.1564e-04
Loss = 3.3322e-01, PNorm = 58.3696, GNorm = 1.9490, lr_0 = 4.1536e-04
Loss = 4.0591e-01, PNorm = 58.3771, GNorm = 1.7264, lr_0 = 4.1507e-04
Loss = 3.5952e-01, PNorm = 58.3852, GNorm = 1.4560, lr_0 = 4.1479e-04
Loss = 4.1278e-01, PNorm = 58.3938, GNorm = 1.3321, lr_0 = 4.1450e-04
Loss = 4.3883e-01, PNorm = 58.3999, GNorm = 1.4135, lr_0 = 4.1422e-04
Loss = 3.4573e-01, PNorm = 58.4005, GNorm = 1.4685, lr_0 = 4.1394e-04
Loss = 3.2560e-01, PNorm = 58.4053, GNorm = 1.3312, lr_0 = 4.1365e-04
Loss = 3.9705e-01, PNorm = 58.4115, GNorm = 1.3790, lr_0 = 4.1337e-04
Loss = 3.5203e-01, PNorm = 58.4178, GNorm = 1.5492, lr_0 = 4.1309e-04
Loss = 3.9297e-01, PNorm = 58.4276, GNorm = 1.2177, lr_0 = 4.1280e-04
Loss = 4.5725e-01, PNorm = 58.4294, GNorm = 1.5165, lr_0 = 4.1252e-04
Loss = 4.2581e-01, PNorm = 58.4337, GNorm = 1.5754, lr_0 = 4.1224e-04
Loss = 3.7873e-01, PNorm = 58.4415, GNorm = 2.0907, lr_0 = 4.1196e-04
Loss = 4.2915e-01, PNorm = 58.4465, GNorm = 1.0657, lr_0 = 4.1167e-04
Loss = 4.4083e-01, PNorm = 58.4546, GNorm = 1.6881, lr_0 = 4.1139e-04
Loss = 4.0408e-01, PNorm = 58.4622, GNorm = 1.6706, lr_0 = 4.1111e-04
Loss = 3.9957e-01, PNorm = 58.4669, GNorm = 1.4308, lr_0 = 4.1083e-04
Loss = 4.0393e-01, PNorm = 58.4725, GNorm = 1.3524, lr_0 = 4.1055e-04
Loss = 4.1255e-01, PNorm = 58.4792, GNorm = 1.2211, lr_0 = 4.1027e-04
Loss = 4.5405e-01, PNorm = 58.4840, GNorm = 1.2362, lr_0 = 4.0998e-04
Loss = 3.5203e-01, PNorm = 58.4863, GNorm = 1.3274, lr_0 = 4.0970e-04
Loss = 3.5789e-01, PNorm = 58.4963, GNorm = 1.9129, lr_0 = 4.0942e-04
Loss = 4.4114e-01, PNorm = 58.5006, GNorm = 2.0611, lr_0 = 4.0914e-04
Loss = 3.9521e-01, PNorm = 58.5046, GNorm = 1.0868, lr_0 = 4.0886e-04
Loss = 3.9200e-01, PNorm = 58.5098, GNorm = 1.1693, lr_0 = 4.0858e-04
Loss = 3.8446e-01, PNorm = 58.5156, GNorm = 1.7799, lr_0 = 4.0830e-04
Loss = 4.0846e-01, PNorm = 58.5235, GNorm = 1.6946, lr_0 = 4.0802e-04
Loss = 4.5450e-01, PNorm = 58.5308, GNorm = 1.6422, lr_0 = 4.0774e-04
Loss = 4.1543e-01, PNorm = 58.5414, GNorm = 1.5069, lr_0 = 4.0746e-04
Loss = 3.7960e-01, PNorm = 58.5500, GNorm = 1.1786, lr_0 = 4.0718e-04
Loss = 4.0174e-01, PNorm = 58.5579, GNorm = 1.1560, lr_0 = 4.0691e-04
Loss = 3.6341e-01, PNorm = 58.5663, GNorm = 1.5447, lr_0 = 4.0663e-04
Loss = 4.2039e-01, PNorm = 58.5735, GNorm = 1.3271, lr_0 = 4.0635e-04
Loss = 4.3467e-01, PNorm = 58.5815, GNorm = 1.3530, lr_0 = 4.0607e-04
Loss = 4.0125e-01, PNorm = 58.5855, GNorm = 1.5438, lr_0 = 4.0579e-04
Loss = 4.6295e-01, PNorm = 58.5885, GNorm = 1.2340, lr_0 = 4.0551e-04
Loss = 4.3737e-01, PNorm = 58.5928, GNorm = 1.2717, lr_0 = 4.0524e-04
Loss = 3.7860e-01, PNorm = 58.6026, GNorm = 1.2145, lr_0 = 4.0496e-04
Loss = 4.1715e-01, PNorm = 58.6048, GNorm = 1.0540, lr_0 = 4.0468e-04
Validation mae = 0.114351
Epoch 13
Loss = 3.8677e-01, PNorm = 58.6126, GNorm = 1.0343, lr_0 = 4.0440e-04
Loss = 3.7517e-01, PNorm = 58.6159, GNorm = 0.8652, lr_0 = 4.0413e-04
Loss = 3.9594e-01, PNorm = 58.6217, GNorm = 1.4234, lr_0 = 4.0385e-04
Loss = 4.1881e-01, PNorm = 58.6319, GNorm = 1.3682, lr_0 = 4.0357e-04
Loss = 3.9542e-01, PNorm = 58.6375, GNorm = 1.3974, lr_0 = 4.0330e-04
Loss = 3.9169e-01, PNorm = 58.6429, GNorm = 1.0574, lr_0 = 4.0302e-04
Loss = 4.0166e-01, PNorm = 58.6442, GNorm = 1.3579, lr_0 = 4.0274e-04
Loss = 3.4847e-01, PNorm = 58.6535, GNorm = 1.0221, lr_0 = 4.0247e-04
Loss = 4.0602e-01, PNorm = 58.6626, GNorm = 1.2610, lr_0 = 4.0219e-04
Loss = 3.7266e-01, PNorm = 58.6691, GNorm = 1.8594, lr_0 = 4.0192e-04
Loss = 3.9075e-01, PNorm = 58.6700, GNorm = 1.7376, lr_0 = 4.0164e-04
Loss = 3.8376e-01, PNorm = 58.6838, GNorm = 1.2726, lr_0 = 4.0137e-04
Loss = 4.4198e-01, PNorm = 58.6897, GNorm = 2.2985, lr_0 = 4.0109e-04
Loss = 4.1698e-01, PNorm = 58.6961, GNorm = 1.0639, lr_0 = 4.0082e-04
Loss = 3.9104e-01, PNorm = 58.7002, GNorm = 1.3531, lr_0 = 4.0054e-04
Loss = 4.1802e-01, PNorm = 58.7076, GNorm = 1.0792, lr_0 = 4.0027e-04
Loss = 3.8453e-01, PNorm = 58.7153, GNorm = 0.9978, lr_0 = 3.9999e-04
Loss = 3.8567e-01, PNorm = 58.7163, GNorm = 1.8579, lr_0 = 3.9972e-04
Loss = 3.8504e-01, PNorm = 58.7157, GNorm = 0.9494, lr_0 = 3.9945e-04
Loss = 4.7179e-01, PNorm = 58.7296, GNorm = 1.2325, lr_0 = 3.9917e-04
Loss = 3.6469e-01, PNorm = 58.7429, GNorm = 1.0928, lr_0 = 3.9890e-04
Loss = 3.4212e-01, PNorm = 58.7513, GNorm = 1.3258, lr_0 = 3.9863e-04
Loss = 3.8047e-01, PNorm = 58.7609, GNorm = 1.2740, lr_0 = 3.9835e-04
Loss = 4.7465e-01, PNorm = 58.7681, GNorm = 1.3123, lr_0 = 3.9808e-04
Loss = 3.6960e-01, PNorm = 58.7718, GNorm = 1.5896, lr_0 = 3.9781e-04
Loss = 3.7556e-01, PNorm = 58.7796, GNorm = 1.9155, lr_0 = 3.9753e-04
Loss = 3.5308e-01, PNorm = 58.7851, GNorm = 1.6310, lr_0 = 3.9726e-04
Loss = 3.7218e-01, PNorm = 58.7968, GNorm = 1.9695, lr_0 = 3.9699e-04
Loss = 3.8859e-01, PNorm = 58.7990, GNorm = 1.1092, lr_0 = 3.9672e-04
Loss = 4.1173e-01, PNorm = 58.8042, GNorm = 1.1318, lr_0 = 3.9645e-04
Loss = 4.0852e-01, PNorm = 58.8119, GNorm = 2.5264, lr_0 = 3.9617e-04
Loss = 3.7419e-01, PNorm = 58.8213, GNorm = 1.1986, lr_0 = 3.9590e-04
Loss = 4.2987e-01, PNorm = 58.8315, GNorm = 1.7310, lr_0 = 3.9563e-04
Loss = 4.0241e-01, PNorm = 58.8321, GNorm = 1.2730, lr_0 = 3.9536e-04
Loss = 4.2134e-01, PNorm = 58.8376, GNorm = 1.1834, lr_0 = 3.9509e-04
Loss = 4.4469e-01, PNorm = 58.8503, GNorm = 1.2381, lr_0 = 3.9482e-04
Loss = 4.2163e-01, PNorm = 58.8568, GNorm = 0.9643, lr_0 = 3.9455e-04
Loss = 4.1361e-01, PNorm = 58.8667, GNorm = 1.0166, lr_0 = 3.9428e-04
Loss = 3.4698e-01, PNorm = 58.8714, GNorm = 1.1354, lr_0 = 3.9401e-04
Loss = 3.8010e-01, PNorm = 58.8794, GNorm = 1.2986, lr_0 = 3.9374e-04
Loss = 4.4423e-01, PNorm = 58.8865, GNorm = 2.1658, lr_0 = 3.9347e-04
Loss = 3.9872e-01, PNorm = 58.8913, GNorm = 1.0377, lr_0 = 3.9320e-04
Loss = 4.0159e-01, PNorm = 58.8882, GNorm = 1.0211, lr_0 = 3.9293e-04
Loss = 3.8854e-01, PNorm = 58.8888, GNorm = 1.0180, lr_0 = 3.9266e-04
Loss = 3.6229e-01, PNorm = 58.8982, GNorm = 1.5060, lr_0 = 3.9239e-04
Loss = 3.6919e-01, PNorm = 58.9032, GNorm = 1.1540, lr_0 = 3.9212e-04
Loss = 4.4238e-01, PNorm = 58.9118, GNorm = 1.4308, lr_0 = 3.9185e-04
Loss = 4.2487e-01, PNorm = 58.9164, GNorm = 1.7857, lr_0 = 3.9159e-04
Loss = 4.1838e-01, PNorm = 58.9252, GNorm = 1.1095, lr_0 = 3.9132e-04
Loss = 4.6050e-01, PNorm = 58.9310, GNorm = 1.5397, lr_0 = 3.9105e-04
Loss = 4.4736e-01, PNorm = 58.9388, GNorm = 3.0084, lr_0 = 3.9078e-04
Loss = 4.5488e-01, PNorm = 58.9452, GNorm = 1.2577, lr_0 = 3.9051e-04
Loss = 4.4030e-01, PNorm = 58.9512, GNorm = 1.5176, lr_0 = 3.9025e-04
Loss = 3.6125e-01, PNorm = 58.9533, GNorm = 1.5142, lr_0 = 3.8998e-04
Loss = 3.9536e-01, PNorm = 58.9578, GNorm = 1.9036, lr_0 = 3.8971e-04
Loss = 3.6535e-01, PNorm = 58.9583, GNorm = 1.5496, lr_0 = 3.8945e-04
Loss = 3.6540e-01, PNorm = 58.9634, GNorm = 1.4769, lr_0 = 3.8918e-04
Loss = 3.7177e-01, PNorm = 58.9654, GNorm = 0.8223, lr_0 = 3.8891e-04
Loss = 3.8011e-01, PNorm = 58.9698, GNorm = 0.9684, lr_0 = 3.8865e-04
Loss = 3.8929e-01, PNorm = 58.9726, GNorm = 1.5121, lr_0 = 3.8838e-04
Loss = 3.9841e-01, PNorm = 58.9756, GNorm = 1.4432, lr_0 = 3.8811e-04
Loss = 3.5789e-01, PNorm = 58.9819, GNorm = 1.1917, lr_0 = 3.8785e-04
Loss = 3.8348e-01, PNorm = 58.9871, GNorm = 1.3139, lr_0 = 3.8758e-04
Loss = 3.8686e-01, PNorm = 58.9999, GNorm = 1.7608, lr_0 = 3.8732e-04
Loss = 4.2232e-01, PNorm = 59.0008, GNorm = 1.7681, lr_0 = 3.8705e-04
Loss = 3.6706e-01, PNorm = 59.0102, GNorm = 1.2562, lr_0 = 3.8679e-04
Loss = 3.6355e-01, PNorm = 59.0202, GNorm = 1.0961, lr_0 = 3.8652e-04
Loss = 4.1753e-01, PNorm = 59.0279, GNorm = 2.1761, lr_0 = 3.8626e-04
Loss = 3.7719e-01, PNorm = 59.0277, GNorm = 0.9437, lr_0 = 3.8599e-04
Loss = 3.7781e-01, PNorm = 59.0296, GNorm = 0.9790, lr_0 = 3.8573e-04
Loss = 4.6216e-01, PNorm = 59.0318, GNorm = 1.9982, lr_0 = 3.8546e-04
Loss = 4.1587e-01, PNorm = 59.0404, GNorm = 1.3433, lr_0 = 3.8520e-04
Loss = 3.9563e-01, PNorm = 59.0424, GNorm = 1.4299, lr_0 = 3.8493e-04
Loss = 4.0479e-01, PNorm = 59.0462, GNorm = 1.7527, lr_0 = 3.8467e-04
Loss = 3.7252e-01, PNorm = 59.0485, GNorm = 1.1184, lr_0 = 3.8441e-04
Loss = 3.9771e-01, PNorm = 59.0559, GNorm = 1.5155, lr_0 = 3.8414e-04
Loss = 3.8949e-01, PNorm = 59.0637, GNorm = 1.2130, lr_0 = 3.8388e-04
Loss = 4.2201e-01, PNorm = 59.0684, GNorm = 1.0960, lr_0 = 3.8362e-04
Loss = 3.6900e-01, PNorm = 59.0746, GNorm = 1.5512, lr_0 = 3.8336e-04
Loss = 3.7689e-01, PNorm = 59.0765, GNorm = 0.8325, lr_0 = 3.8309e-04
Loss = 3.9968e-01, PNorm = 59.0785, GNorm = 2.0204, lr_0 = 3.8283e-04
Loss = 3.3948e-01, PNorm = 59.0888, GNorm = 1.0650, lr_0 = 3.8257e-04
Loss = 4.0421e-01, PNorm = 59.0955, GNorm = 1.3573, lr_0 = 3.8231e-04
Loss = 4.1046e-01, PNorm = 59.1046, GNorm = 1.3000, lr_0 = 3.8204e-04
Loss = 4.1941e-01, PNorm = 59.1033, GNorm = 1.3616, lr_0 = 3.8178e-04
Loss = 3.5268e-01, PNorm = 59.1105, GNorm = 1.7318, lr_0 = 3.8152e-04
Loss = 4.0433e-01, PNorm = 59.1126, GNorm = 1.1950, lr_0 = 3.8126e-04
Loss = 4.0595e-01, PNorm = 59.1164, GNorm = 1.4519, lr_0 = 3.8100e-04
Loss = 4.3338e-01, PNorm = 59.1214, GNorm = 1.0799, lr_0 = 3.8074e-04
Loss = 4.1087e-01, PNorm = 59.1257, GNorm = 1.0274, lr_0 = 3.8048e-04
Loss = 4.2222e-01, PNorm = 59.1332, GNorm = 1.7732, lr_0 = 3.8022e-04
Loss = 4.1220e-01, PNorm = 59.1394, GNorm = 2.2332, lr_0 = 3.7995e-04
Loss = 4.0863e-01, PNorm = 59.1495, GNorm = 1.1115, lr_0 = 3.7969e-04
Loss = 4.5972e-01, PNorm = 59.1465, GNorm = 1.3292, lr_0 = 3.7943e-04
Loss = 4.0459e-01, PNorm = 59.1547, GNorm = 1.4461, lr_0 = 3.7917e-04
Loss = 4.0505e-01, PNorm = 59.1609, GNorm = 1.5315, lr_0 = 3.7891e-04
Loss = 4.0163e-01, PNorm = 59.1632, GNorm = 1.1509, lr_0 = 3.7866e-04
Loss = 4.1504e-01, PNorm = 59.1732, GNorm = 1.4048, lr_0 = 3.7840e-04
Loss = 4.3460e-01, PNorm = 59.1742, GNorm = 2.0102, lr_0 = 3.7814e-04
Loss = 3.6407e-01, PNorm = 59.1839, GNorm = 1.0162, lr_0 = 3.7788e-04
Loss = 3.8235e-01, PNorm = 59.1906, GNorm = 1.3169, lr_0 = 3.7762e-04
Loss = 3.9361e-01, PNorm = 59.1948, GNorm = 1.0218, lr_0 = 3.7736e-04
Loss = 3.4906e-01, PNorm = 59.2019, GNorm = 1.6766, lr_0 = 3.7710e-04
Loss = 4.4501e-01, PNorm = 59.2085, GNorm = 0.9359, lr_0 = 3.7684e-04
Loss = 4.2054e-01, PNorm = 59.2138, GNorm = 1.3757, lr_0 = 3.7659e-04
Loss = 3.9932e-01, PNorm = 59.2220, GNorm = 1.5736, lr_0 = 3.7633e-04
Loss = 4.0654e-01, PNorm = 59.2273, GNorm = 1.1694, lr_0 = 3.7607e-04
Loss = 4.0650e-01, PNorm = 59.2281, GNorm = 1.1584, lr_0 = 3.7581e-04
Loss = 3.8714e-01, PNorm = 59.2322, GNorm = 1.1942, lr_0 = 3.7555e-04
Loss = 3.8926e-01, PNorm = 59.2400, GNorm = 1.1432, lr_0 = 3.7530e-04
Loss = 3.7742e-01, PNorm = 59.2412, GNorm = 1.8278, lr_0 = 3.7504e-04
Loss = 4.0353e-01, PNorm = 59.2438, GNorm = 2.2435, lr_0 = 3.7478e-04
Loss = 3.3723e-01, PNorm = 59.2479, GNorm = 1.1880, lr_0 = 3.7453e-04
Loss = 3.9025e-01, PNorm = 59.2522, GNorm = 1.4178, lr_0 = 3.7427e-04
Loss = 3.7230e-01, PNorm = 59.2642, GNorm = 1.2887, lr_0 = 3.7401e-04
Loss = 3.8577e-01, PNorm = 59.2692, GNorm = 1.6272, lr_0 = 3.7376e-04
Loss = 3.9614e-01, PNorm = 59.2741, GNorm = 2.0121, lr_0 = 3.7350e-04
Loss = 3.5219e-01, PNorm = 59.2770, GNorm = 1.6296, lr_0 = 3.7325e-04
Loss = 4.1970e-01, PNorm = 59.2762, GNorm = 0.9729, lr_0 = 3.7299e-04
Loss = 3.7185e-01, PNorm = 59.2766, GNorm = 1.0609, lr_0 = 3.7273e-04
Validation mae = 0.113379
Epoch 14
Loss = 3.4794e-01, PNorm = 59.2821, GNorm = 1.2648, lr_0 = 3.7248e-04
Loss = 3.6330e-01, PNorm = 59.2855, GNorm = 1.7591, lr_0 = 3.7222e-04
Loss = 3.5755e-01, PNorm = 59.2922, GNorm = 1.1338, lr_0 = 3.7197e-04
Loss = 3.7830e-01, PNorm = 59.2943, GNorm = 1.6292, lr_0 = 3.7171e-04
Loss = 4.1006e-01, PNorm = 59.2970, GNorm = 1.4056, lr_0 = 3.7146e-04
Loss = 3.7127e-01, PNorm = 59.3084, GNorm = 1.2579, lr_0 = 3.7120e-04
Loss = 3.7993e-01, PNorm = 59.3181, GNorm = 1.1573, lr_0 = 3.7095e-04
Loss = 4.4768e-01, PNorm = 59.3253, GNorm = 1.3417, lr_0 = 3.7070e-04
Loss = 3.7967e-01, PNorm = 59.3335, GNorm = 1.7761, lr_0 = 3.7044e-04
Loss = 4.0961e-01, PNorm = 59.3424, GNorm = 2.1682, lr_0 = 3.7019e-04
Loss = 3.8553e-01, PNorm = 59.3500, GNorm = 1.2800, lr_0 = 3.6993e-04
Loss = 3.6823e-01, PNorm = 59.3568, GNorm = 1.0736, lr_0 = 3.6968e-04
Loss = 3.8024e-01, PNorm = 59.3628, GNorm = 1.4055, lr_0 = 3.6943e-04
Loss = 3.9554e-01, PNorm = 59.3718, GNorm = 0.9932, lr_0 = 3.6917e-04
Loss = 3.7507e-01, PNorm = 59.3759, GNorm = 1.6959, lr_0 = 3.6892e-04
Loss = 3.9702e-01, PNorm = 59.3787, GNorm = 1.3555, lr_0 = 3.6867e-04
Loss = 3.4042e-01, PNorm = 59.3819, GNorm = 0.8849, lr_0 = 3.6842e-04
Loss = 4.4707e-01, PNorm = 59.3841, GNorm = 1.4131, lr_0 = 3.6816e-04
Loss = 3.7126e-01, PNorm = 59.3887, GNorm = 2.6269, lr_0 = 3.6791e-04
Loss = 3.6277e-01, PNorm = 59.3919, GNorm = 1.1401, lr_0 = 3.6766e-04
Loss = 4.2722e-01, PNorm = 59.3968, GNorm = 1.7755, lr_0 = 3.6741e-04
Loss = 3.8623e-01, PNorm = 59.4032, GNorm = 1.5871, lr_0 = 3.6716e-04
Loss = 3.6889e-01, PNorm = 59.4102, GNorm = 1.5031, lr_0 = 3.6690e-04
Loss = 4.1667e-01, PNorm = 59.4208, GNorm = 1.7288, lr_0 = 3.6665e-04
Loss = 3.5202e-01, PNorm = 59.4250, GNorm = 1.4474, lr_0 = 3.6640e-04
Loss = 4.1654e-01, PNorm = 59.4306, GNorm = 0.9843, lr_0 = 3.6615e-04
Loss = 3.8334e-01, PNorm = 59.4370, GNorm = 1.3189, lr_0 = 3.6590e-04
Loss = 3.7530e-01, PNorm = 59.4411, GNorm = 1.5277, lr_0 = 3.6565e-04
Loss = 4.2742e-01, PNorm = 59.4457, GNorm = 1.6142, lr_0 = 3.6540e-04
Loss = 3.8861e-01, PNorm = 59.4564, GNorm = 1.3131, lr_0 = 3.6515e-04
Loss = 4.4182e-01, PNorm = 59.4565, GNorm = 1.3133, lr_0 = 3.6490e-04
Loss = 3.7544e-01, PNorm = 59.4603, GNorm = 1.7873, lr_0 = 3.6465e-04
Loss = 3.4803e-01, PNorm = 59.4691, GNorm = 1.5377, lr_0 = 3.6440e-04
Loss = 3.6199e-01, PNorm = 59.4764, GNorm = 1.6076, lr_0 = 3.6415e-04
Loss = 3.4344e-01, PNorm = 59.4791, GNorm = 1.2461, lr_0 = 3.6390e-04
Loss = 3.9807e-01, PNorm = 59.4836, GNorm = 1.1370, lr_0 = 3.6365e-04
Loss = 3.7874e-01, PNorm = 59.4902, GNorm = 1.6132, lr_0 = 3.6340e-04
Loss = 4.8909e-01, PNorm = 59.4960, GNorm = 1.4312, lr_0 = 3.6315e-04
Loss = 3.7675e-01, PNorm = 59.5045, GNorm = 1.3383, lr_0 = 3.6290e-04
Loss = 3.6877e-01, PNorm = 59.5119, GNorm = 1.6212, lr_0 = 3.6266e-04
Loss = 3.9419e-01, PNorm = 59.5178, GNorm = 1.4723, lr_0 = 3.6241e-04
Loss = 4.2132e-01, PNorm = 59.5269, GNorm = 1.5823, lr_0 = 3.6216e-04
Loss = 3.4626e-01, PNorm = 59.5340, GNorm = 1.2608, lr_0 = 3.6191e-04
Loss = 3.9940e-01, PNorm = 59.5397, GNorm = 1.1774, lr_0 = 3.6166e-04
Loss = 3.8654e-01, PNorm = 59.5453, GNorm = 1.9812, lr_0 = 3.6141e-04
Loss = 3.7544e-01, PNorm = 59.5471, GNorm = 0.8106, lr_0 = 3.6117e-04
Loss = 3.9791e-01, PNorm = 59.5515, GNorm = 1.2929, lr_0 = 3.6092e-04
Loss = 3.8070e-01, PNorm = 59.5576, GNorm = 1.2107, lr_0 = 3.6067e-04
Loss = 4.0838e-01, PNorm = 59.5601, GNorm = 1.5406, lr_0 = 3.6043e-04
Loss = 4.3528e-01, PNorm = 59.5677, GNorm = 1.8146, lr_0 = 3.6018e-04
Loss = 3.7578e-01, PNorm = 59.5756, GNorm = 1.3160, lr_0 = 3.5993e-04
Loss = 3.7925e-01, PNorm = 59.5791, GNorm = 2.4617, lr_0 = 3.5969e-04
Loss = 3.6739e-01, PNorm = 59.5835, GNorm = 1.5983, lr_0 = 3.5944e-04
Loss = 4.2520e-01, PNorm = 59.5911, GNorm = 1.3145, lr_0 = 3.5919e-04
Loss = 3.8208e-01, PNorm = 59.5938, GNorm = 1.2594, lr_0 = 3.5895e-04
Loss = 4.1051e-01, PNorm = 59.6004, GNorm = 1.7320, lr_0 = 3.5870e-04
Loss = 4.1640e-01, PNorm = 59.6057, GNorm = 2.0464, lr_0 = 3.5845e-04
Loss = 3.3954e-01, PNorm = 59.6104, GNorm = 1.1917, lr_0 = 3.5821e-04
Loss = 3.8725e-01, PNorm = 59.6132, GNorm = 1.2407, lr_0 = 3.5796e-04
Loss = 4.1316e-01, PNorm = 59.6204, GNorm = 1.4425, lr_0 = 3.5772e-04
Loss = 4.3006e-01, PNorm = 59.6222, GNorm = 1.7637, lr_0 = 3.5747e-04
Loss = 3.9981e-01, PNorm = 59.6239, GNorm = 1.8105, lr_0 = 3.5723e-04
Loss = 4.5785e-01, PNorm = 59.6255, GNorm = 1.4072, lr_0 = 3.5698e-04
Loss = 3.9079e-01, PNorm = 59.6280, GNorm = 1.6296, lr_0 = 3.5674e-04
Loss = 3.6247e-01, PNorm = 59.6340, GNorm = 0.9924, lr_0 = 3.5650e-04
Loss = 3.5609e-01, PNorm = 59.6417, GNorm = 0.9360, lr_0 = 3.5625e-04
Loss = 3.5202e-01, PNorm = 59.6451, GNorm = 1.6690, lr_0 = 3.5601e-04
Loss = 3.9721e-01, PNorm = 59.6505, GNorm = 1.6794, lr_0 = 3.5576e-04
Loss = 3.6949e-01, PNorm = 59.6531, GNorm = 1.4539, lr_0 = 3.5552e-04
Loss = 4.0735e-01, PNorm = 59.6541, GNorm = 1.3555, lr_0 = 3.5528e-04
Loss = 3.5926e-01, PNorm = 59.6598, GNorm = 1.7772, lr_0 = 3.5503e-04
Loss = 3.6643e-01, PNorm = 59.6669, GNorm = 1.2494, lr_0 = 3.5479e-04
Loss = 3.7316e-01, PNorm = 59.6707, GNorm = 1.7786, lr_0 = 3.5455e-04
Loss = 3.3908e-01, PNorm = 59.6814, GNorm = 1.2216, lr_0 = 3.5430e-04
Loss = 3.6473e-01, PNorm = 59.6883, GNorm = 1.1988, lr_0 = 3.5406e-04
Loss = 4.1467e-01, PNorm = 59.6948, GNorm = 2.2122, lr_0 = 3.5382e-04
Loss = 4.0143e-01, PNorm = 59.7032, GNorm = 1.9655, lr_0 = 3.5358e-04
Loss = 3.9318e-01, PNorm = 59.7131, GNorm = 1.5922, lr_0 = 3.5333e-04
Loss = 3.9872e-01, PNorm = 59.7178, GNorm = 1.8581, lr_0 = 3.5309e-04
Loss = 4.1063e-01, PNorm = 59.7168, GNorm = 1.2971, lr_0 = 3.5285e-04
Loss = 4.2547e-01, PNorm = 59.7214, GNorm = 1.2612, lr_0 = 3.5261e-04
Loss = 3.8836e-01, PNorm = 59.7271, GNorm = 1.5069, lr_0 = 3.5237e-04
Loss = 3.7206e-01, PNorm = 59.7296, GNorm = 1.2472, lr_0 = 3.5212e-04
Loss = 3.7922e-01, PNorm = 59.7360, GNorm = 1.2403, lr_0 = 3.5188e-04
Loss = 3.7843e-01, PNorm = 59.7396, GNorm = 1.5018, lr_0 = 3.5164e-04
Loss = 3.8529e-01, PNorm = 59.7437, GNorm = 2.2584, lr_0 = 3.5140e-04
Loss = 4.3197e-01, PNorm = 59.7462, GNorm = 1.4264, lr_0 = 3.5116e-04
Loss = 4.3171e-01, PNorm = 59.7547, GNorm = 1.3582, lr_0 = 3.5092e-04
Loss = 3.7490e-01, PNorm = 59.7605, GNorm = 1.5859, lr_0 = 3.5068e-04
Loss = 3.8650e-01, PNorm = 59.7623, GNorm = 1.1603, lr_0 = 3.5044e-04
Loss = 3.5224e-01, PNorm = 59.7683, GNorm = 1.4060, lr_0 = 3.5020e-04
Loss = 3.9097e-01, PNorm = 59.7711, GNorm = 1.6529, lr_0 = 3.4996e-04
Loss = 3.8452e-01, PNorm = 59.7774, GNorm = 1.6407, lr_0 = 3.4972e-04
Loss = 4.0664e-01, PNorm = 59.7854, GNorm = 1.2264, lr_0 = 3.4948e-04
Loss = 3.4561e-01, PNorm = 59.7922, GNorm = 1.6275, lr_0 = 3.4924e-04
Loss = 3.6187e-01, PNorm = 59.7981, GNorm = 1.6589, lr_0 = 3.4900e-04
Loss = 3.6298e-01, PNorm = 59.8070, GNorm = 1.8147, lr_0 = 3.4876e-04
Loss = 4.1684e-01, PNorm = 59.8060, GNorm = 0.9697, lr_0 = 3.4852e-04
Loss = 3.4863e-01, PNorm = 59.8081, GNorm = 1.0161, lr_0 = 3.4828e-04
Loss = 3.9278e-01, PNorm = 59.8105, GNorm = 1.3401, lr_0 = 3.4805e-04
Loss = 3.8029e-01, PNorm = 59.8178, GNorm = 1.9203, lr_0 = 3.4781e-04
Loss = 4.2858e-01, PNorm = 59.8244, GNorm = 1.2562, lr_0 = 3.4757e-04
Loss = 4.2359e-01, PNorm = 59.8308, GNorm = 1.2655, lr_0 = 3.4733e-04
Loss = 3.6658e-01, PNorm = 59.8324, GNorm = 1.0562, lr_0 = 3.4709e-04
Loss = 4.0839e-01, PNorm = 59.8334, GNorm = 1.9272, lr_0 = 3.4686e-04
Loss = 4.0342e-01, PNorm = 59.8350, GNorm = 1.1592, lr_0 = 3.4662e-04
Loss = 3.7241e-01, PNorm = 59.8386, GNorm = 1.6543, lr_0 = 3.4638e-04
Loss = 4.1213e-01, PNorm = 59.8441, GNorm = 1.7387, lr_0 = 3.4614e-04
Loss = 4.0378e-01, PNorm = 59.8507, GNorm = 1.2869, lr_0 = 3.4591e-04
Loss = 3.7486e-01, PNorm = 59.8565, GNorm = 1.3441, lr_0 = 3.4567e-04
Loss = 3.6103e-01, PNorm = 59.8590, GNorm = 1.3434, lr_0 = 3.4543e-04
Loss = 3.8673e-01, PNorm = 59.8636, GNorm = 1.1537, lr_0 = 3.4520e-04
Loss = 3.9912e-01, PNorm = 59.8648, GNorm = 2.1999, lr_0 = 3.4496e-04
Loss = 4.3103e-01, PNorm = 59.8694, GNorm = 1.2992, lr_0 = 3.4472e-04
Loss = 3.7160e-01, PNorm = 59.8700, GNorm = 1.0730, lr_0 = 3.4449e-04
Loss = 3.8616e-01, PNorm = 59.8742, GNorm = 1.3324, lr_0 = 3.4425e-04
Loss = 3.7594e-01, PNorm = 59.8764, GNorm = 1.2031, lr_0 = 3.4402e-04
Loss = 4.1721e-01, PNorm = 59.8802, GNorm = 1.0587, lr_0 = 3.4378e-04
Loss = 3.7708e-01, PNorm = 59.8840, GNorm = 1.0544, lr_0 = 3.4354e-04
Loss = 3.8394e-01, PNorm = 59.8866, GNorm = 0.9360, lr_0 = 3.4331e-04
Validation mae = 0.113363
Epoch 15
Loss = 2.8211e-01, PNorm = 59.8930, GNorm = 1.3583, lr_0 = 3.4307e-04
Loss = 3.6910e-01, PNorm = 59.9006, GNorm = 1.0943, lr_0 = 3.4284e-04
Loss = 4.0320e-01, PNorm = 59.9072, GNorm = 1.3742, lr_0 = 3.4260e-04
Loss = 3.5519e-01, PNorm = 59.9113, GNorm = 1.1983, lr_0 = 3.4237e-04
Loss = 4.0842e-01, PNorm = 59.9179, GNorm = 1.1205, lr_0 = 3.4213e-04
Loss = 4.2267e-01, PNorm = 59.9278, GNorm = 1.1785, lr_0 = 3.4190e-04
Loss = 3.7752e-01, PNorm = 59.9332, GNorm = 1.7753, lr_0 = 3.4167e-04
Loss = 3.9876e-01, PNorm = 59.9385, GNorm = 1.4419, lr_0 = 3.4143e-04
Loss = 3.6308e-01, PNorm = 59.9397, GNorm = 1.4820, lr_0 = 3.4120e-04
Loss = 3.3850e-01, PNorm = 59.9452, GNorm = 1.5727, lr_0 = 3.4096e-04
Loss = 3.5931e-01, PNorm = 59.9506, GNorm = 0.9921, lr_0 = 3.4073e-04
Loss = 3.6765e-01, PNorm = 59.9580, GNorm = 1.2319, lr_0 = 3.4050e-04
Loss = 4.1450e-01, PNorm = 59.9606, GNorm = 1.6613, lr_0 = 3.4026e-04
Loss = 3.7837e-01, PNorm = 59.9675, GNorm = 1.7324, lr_0 = 3.4003e-04
Loss = 3.5935e-01, PNorm = 59.9731, GNorm = 1.4265, lr_0 = 3.3980e-04
Loss = 3.4350e-01, PNorm = 59.9785, GNorm = 1.7360, lr_0 = 3.3956e-04
Loss = 3.4286e-01, PNorm = 59.9827, GNorm = 1.2860, lr_0 = 3.3933e-04
Loss = 3.5805e-01, PNorm = 59.9837, GNorm = 0.9648, lr_0 = 3.3910e-04
Loss = 3.9137e-01, PNorm = 59.9886, GNorm = 1.1863, lr_0 = 3.3887e-04
Loss = 4.2309e-01, PNorm = 59.9929, GNorm = 1.0511, lr_0 = 3.3864e-04
Loss = 4.0452e-01, PNorm = 59.9992, GNorm = 1.3462, lr_0 = 3.3840e-04
Loss = 3.8094e-01, PNorm = 60.0083, GNorm = 2.5950, lr_0 = 3.3817e-04
Loss = 3.7105e-01, PNorm = 60.0090, GNorm = 1.6759, lr_0 = 3.3794e-04
Loss = 5.1386e-01, PNorm = 60.0101, GNorm = 1.7648, lr_0 = 3.3771e-04
Loss = 3.8178e-01, PNorm = 60.0137, GNorm = 1.4853, lr_0 = 3.3748e-04
Loss = 4.0646e-01, PNorm = 60.0108, GNorm = 1.8239, lr_0 = 3.3725e-04
Loss = 4.1522e-01, PNorm = 60.0149, GNorm = 0.8564, lr_0 = 3.3701e-04
Loss = 4.1216e-01, PNorm = 60.0196, GNorm = 1.3407, lr_0 = 3.3678e-04
Loss = 3.8497e-01, PNorm = 60.0228, GNorm = 0.9831, lr_0 = 3.3655e-04
Loss = 3.9781e-01, PNorm = 60.0250, GNorm = 1.2716, lr_0 = 3.3632e-04
Loss = 3.5574e-01, PNorm = 60.0337, GNorm = 1.3112, lr_0 = 3.3609e-04
Loss = 4.5428e-01, PNorm = 60.0389, GNorm = 1.7926, lr_0 = 3.3586e-04
Loss = 3.7734e-01, PNorm = 60.0440, GNorm = 1.3101, lr_0 = 3.3563e-04
Loss = 3.8186e-01, PNorm = 60.0432, GNorm = 1.8834, lr_0 = 3.3540e-04
Loss = 3.9515e-01, PNorm = 60.0495, GNorm = 1.7476, lr_0 = 3.3517e-04
Loss = 3.9819e-01, PNorm = 60.0563, GNorm = 1.0507, lr_0 = 3.3494e-04
Loss = 3.4700e-01, PNorm = 60.0659, GNorm = 1.3041, lr_0 = 3.3471e-04
Loss = 4.3138e-01, PNorm = 60.0710, GNorm = 1.2261, lr_0 = 3.3448e-04
Loss = 3.7250e-01, PNorm = 60.0749, GNorm = 1.2738, lr_0 = 3.3425e-04
Loss = 3.6109e-01, PNorm = 60.0794, GNorm = 1.0942, lr_0 = 3.3403e-04
Loss = 3.9302e-01, PNorm = 60.0839, GNorm = 1.1086, lr_0 = 3.3380e-04
Loss = 3.6201e-01, PNorm = 60.0895, GNorm = 1.7150, lr_0 = 3.3357e-04
Loss = 3.5544e-01, PNorm = 60.0930, GNorm = 1.3083, lr_0 = 3.3334e-04
Loss = 4.0431e-01, PNorm = 60.0969, GNorm = 1.2371, lr_0 = 3.3311e-04
Loss = 3.8927e-01, PNorm = 60.1051, GNorm = 2.0698, lr_0 = 3.3288e-04
Loss = 4.2024e-01, PNorm = 60.1047, GNorm = 1.0648, lr_0 = 3.3265e-04
Loss = 4.0370e-01, PNorm = 60.1128, GNorm = 2.0052, lr_0 = 3.3243e-04
Loss = 3.9081e-01, PNorm = 60.1167, GNorm = 1.2898, lr_0 = 3.3220e-04
Loss = 3.5912e-01, PNorm = 60.1206, GNorm = 1.1776, lr_0 = 3.3197e-04
Loss = 3.4269e-01, PNorm = 60.1257, GNorm = 1.1097, lr_0 = 3.3174e-04
Loss = 3.7888e-01, PNorm = 60.1309, GNorm = 1.4021, lr_0 = 3.3152e-04
Loss = 3.5479e-01, PNorm = 60.1344, GNorm = 1.6357, lr_0 = 3.3129e-04
Loss = 3.5588e-01, PNorm = 60.1354, GNorm = 1.3660, lr_0 = 3.3106e-04
Loss = 4.2998e-01, PNorm = 60.1409, GNorm = 1.6033, lr_0 = 3.3084e-04
Loss = 3.7653e-01, PNorm = 60.1425, GNorm = 1.9493, lr_0 = 3.3061e-04
Loss = 3.8687e-01, PNorm = 60.1446, GNorm = 1.2303, lr_0 = 3.3038e-04
Loss = 4.4459e-01, PNorm = 60.1493, GNorm = 2.1561, lr_0 = 3.3016e-04
Loss = 4.1556e-01, PNorm = 60.1538, GNorm = 1.0945, lr_0 = 3.2993e-04
Loss = 3.7889e-01, PNorm = 60.1551, GNorm = 1.5298, lr_0 = 3.2970e-04
Loss = 3.8257e-01, PNorm = 60.1608, GNorm = 1.3202, lr_0 = 3.2948e-04
Loss = 3.9528e-01, PNorm = 60.1656, GNorm = 1.4191, lr_0 = 3.2925e-04
Loss = 3.6193e-01, PNorm = 60.1691, GNorm = 1.2150, lr_0 = 3.2903e-04
Loss = 4.6688e-01, PNorm = 60.1792, GNorm = 1.3605, lr_0 = 3.2880e-04
Loss = 3.7198e-01, PNorm = 60.1807, GNorm = 1.5016, lr_0 = 3.2858e-04
Loss = 4.4275e-01, PNorm = 60.1885, GNorm = 1.1643, lr_0 = 3.2835e-04
Loss = 4.0036e-01, PNorm = 60.1988, GNorm = 1.7512, lr_0 = 3.2813e-04
Loss = 4.0709e-01, PNorm = 60.2044, GNorm = 1.5481, lr_0 = 3.2790e-04
Loss = 3.7034e-01, PNorm = 60.2080, GNorm = 1.4037, lr_0 = 3.2768e-04
Loss = 3.9321e-01, PNorm = 60.2114, GNorm = 1.5166, lr_0 = 3.2745e-04
Loss = 3.6927e-01, PNorm = 60.2191, GNorm = 1.1933, lr_0 = 3.2723e-04
Loss = 3.9840e-01, PNorm = 60.2233, GNorm = 1.1845, lr_0 = 3.2700e-04
Loss = 4.4698e-01, PNorm = 60.2291, GNorm = 2.2087, lr_0 = 3.2678e-04
Loss = 3.6751e-01, PNorm = 60.2329, GNorm = 1.1922, lr_0 = 3.2656e-04
Loss = 3.7468e-01, PNorm = 60.2356, GNorm = 1.0916, lr_0 = 3.2633e-04
Loss = 3.6563e-01, PNorm = 60.2396, GNorm = 1.1191, lr_0 = 3.2611e-04
Loss = 3.7478e-01, PNorm = 60.2428, GNorm = 1.2660, lr_0 = 3.2589e-04
Loss = 3.4460e-01, PNorm = 60.2504, GNorm = 1.4417, lr_0 = 3.2566e-04
Loss = 3.6684e-01, PNorm = 60.2545, GNorm = 1.2427, lr_0 = 3.2544e-04
Loss = 3.6854e-01, PNorm = 60.2559, GNorm = 1.1755, lr_0 = 3.2522e-04
Loss = 4.2000e-01, PNorm = 60.2583, GNorm = 1.3873, lr_0 = 3.2499e-04
Loss = 3.5464e-01, PNorm = 60.2597, GNorm = 1.0564, lr_0 = 3.2477e-04
Loss = 3.5100e-01, PNorm = 60.2653, GNorm = 1.1212, lr_0 = 3.2455e-04
Loss = 3.9703e-01, PNorm = 60.2692, GNorm = 1.4148, lr_0 = 3.2433e-04
Loss = 3.6072e-01, PNorm = 60.2726, GNorm = 1.4923, lr_0 = 3.2410e-04
Loss = 3.9187e-01, PNorm = 60.2766, GNorm = 1.3686, lr_0 = 3.2388e-04
Loss = 3.2807e-01, PNorm = 60.2808, GNorm = 1.1787, lr_0 = 3.2366e-04
Loss = 3.8929e-01, PNorm = 60.2834, GNorm = 1.8115, lr_0 = 3.2344e-04
Loss = 4.2128e-01, PNorm = 60.2874, GNorm = 1.6508, lr_0 = 3.2322e-04
Loss = 3.5607e-01, PNorm = 60.2924, GNorm = 1.7494, lr_0 = 3.2300e-04
Loss = 3.8974e-01, PNorm = 60.2979, GNorm = 1.1366, lr_0 = 3.2277e-04
Loss = 3.5782e-01, PNorm = 60.3040, GNorm = 0.9953, lr_0 = 3.2255e-04
Loss = 3.9754e-01, PNorm = 60.3072, GNorm = 1.1464, lr_0 = 3.2233e-04
Loss = 3.8294e-01, PNorm = 60.3104, GNorm = 1.0025, lr_0 = 3.2211e-04
Loss = 4.4012e-01, PNorm = 60.3123, GNorm = 1.1160, lr_0 = 3.2189e-04
Loss = 3.8790e-01, PNorm = 60.3178, GNorm = 1.7900, lr_0 = 3.2167e-04
Loss = 3.6149e-01, PNorm = 60.3227, GNorm = 0.9657, lr_0 = 3.2145e-04
Loss = 4.0679e-01, PNorm = 60.3256, GNorm = 1.0758, lr_0 = 3.2123e-04
Loss = 3.6698e-01, PNorm = 60.3288, GNorm = 1.6774, lr_0 = 3.2101e-04
Loss = 3.6246e-01, PNorm = 60.3339, GNorm = 0.8727, lr_0 = 3.2079e-04
Loss = 3.7408e-01, PNorm = 60.3408, GNorm = 1.4682, lr_0 = 3.2057e-04
Loss = 4.1485e-01, PNorm = 60.3477, GNorm = 1.4260, lr_0 = 3.2035e-04
Loss = 4.2968e-01, PNorm = 60.3559, GNorm = 1.6097, lr_0 = 3.2013e-04
Loss = 3.7552e-01, PNorm = 60.3576, GNorm = 1.1878, lr_0 = 3.1991e-04
Loss = 4.7044e-01, PNorm = 60.3618, GNorm = 2.3750, lr_0 = 3.1969e-04
Loss = 3.7976e-01, PNorm = 60.3688, GNorm = 1.4705, lr_0 = 3.1947e-04
Loss = 3.6928e-01, PNorm = 60.3707, GNorm = 1.3400, lr_0 = 3.1925e-04
Loss = 3.8247e-01, PNorm = 60.3784, GNorm = 1.3784, lr_0 = 3.1904e-04
Loss = 3.7834e-01, PNorm = 60.3821, GNorm = 1.3037, lr_0 = 3.1882e-04
Loss = 3.8227e-01, PNorm = 60.3842, GNorm = 1.1101, lr_0 = 3.1860e-04
Loss = 3.5134e-01, PNorm = 60.3890, GNorm = 1.2664, lr_0 = 3.1838e-04
Loss = 3.5547e-01, PNorm = 60.3905, GNorm = 1.1657, lr_0 = 3.1816e-04
Loss = 4.1467e-01, PNorm = 60.3955, GNorm = 1.0874, lr_0 = 3.1794e-04
Loss = 3.6833e-01, PNorm = 60.4010, GNorm = 1.5143, lr_0 = 3.1773e-04
Loss = 4.0000e-01, PNorm = 60.4048, GNorm = 1.5784, lr_0 = 3.1751e-04
Loss = 4.3295e-01, PNorm = 60.4118, GNorm = 1.2508, lr_0 = 3.1729e-04
Loss = 3.7554e-01, PNorm = 60.4187, GNorm = 1.4673, lr_0 = 3.1707e-04
Loss = 4.2170e-01, PNorm = 60.4207, GNorm = 1.1622, lr_0 = 3.1686e-04
Loss = 3.4645e-01, PNorm = 60.4270, GNorm = 1.5788, lr_0 = 3.1664e-04
Loss = 4.7429e-01, PNorm = 60.4306, GNorm = 1.9105, lr_0 = 3.1642e-04
Loss = 3.5396e-01, PNorm = 60.4324, GNorm = 0.9411, lr_0 = 3.1621e-04
Validation mae = 0.113734
Epoch 16
Loss = 3.4276e-01, PNorm = 60.4378, GNorm = 1.8198, lr_0 = 3.1599e-04
Loss = 3.8856e-01, PNorm = 60.4431, GNorm = 1.4156, lr_0 = 3.1577e-04
Loss = 4.5135e-01, PNorm = 60.4483, GNorm = 2.3811, lr_0 = 3.1556e-04
Loss = 4.1757e-01, PNorm = 60.4550, GNorm = 1.2411, lr_0 = 3.1534e-04
Loss = 3.9456e-01, PNorm = 60.4608, GNorm = 1.5475, lr_0 = 3.1512e-04
Loss = 3.7386e-01, PNorm = 60.4644, GNorm = 1.5149, lr_0 = 3.1491e-04
Loss = 4.0742e-01, PNorm = 60.4689, GNorm = 1.2359, lr_0 = 3.1469e-04
Loss = 3.8059e-01, PNorm = 60.4693, GNorm = 1.4026, lr_0 = 3.1448e-04
Loss = 4.2530e-01, PNorm = 60.4711, GNorm = 1.8145, lr_0 = 3.1426e-04
Loss = 3.6352e-01, PNorm = 60.4716, GNorm = 1.2627, lr_0 = 3.1405e-04
Loss = 3.4573e-01, PNorm = 60.4737, GNorm = 1.2259, lr_0 = 3.1383e-04
Loss = 3.7772e-01, PNorm = 60.4797, GNorm = 1.4572, lr_0 = 3.1362e-04
Loss = 3.6984e-01, PNorm = 60.4870, GNorm = 1.3101, lr_0 = 3.1340e-04
Loss = 3.6017e-01, PNorm = 60.4914, GNorm = 1.5092, lr_0 = 3.1319e-04
Loss = 3.7488e-01, PNorm = 60.4912, GNorm = 2.0296, lr_0 = 3.1297e-04
Loss = 3.6841e-01, PNorm = 60.4967, GNorm = 1.0756, lr_0 = 3.1276e-04
Loss = 4.0756e-01, PNorm = 60.5043, GNorm = 1.3147, lr_0 = 3.1254e-04
Loss = 3.8679e-01, PNorm = 60.5120, GNorm = 1.7494, lr_0 = 3.1233e-04
Loss = 3.6232e-01, PNorm = 60.5209, GNorm = 1.2421, lr_0 = 3.1212e-04
Loss = 3.6355e-01, PNorm = 60.5240, GNorm = 1.6396, lr_0 = 3.1190e-04
Loss = 3.3518e-01, PNorm = 60.5287, GNorm = 1.3881, lr_0 = 3.1169e-04
Loss = 3.6226e-01, PNorm = 60.5336, GNorm = 1.3605, lr_0 = 3.1147e-04
Loss = 4.2400e-01, PNorm = 60.5399, GNorm = 1.1776, lr_0 = 3.1126e-04
Loss = 4.1640e-01, PNorm = 60.5469, GNorm = 1.6614, lr_0 = 3.1105e-04
Loss = 3.5746e-01, PNorm = 60.5490, GNorm = 1.5724, lr_0 = 3.1083e-04
Loss = 3.7048e-01, PNorm = 60.5504, GNorm = 1.2329, lr_0 = 3.1062e-04
Loss = 3.7232e-01, PNorm = 60.5536, GNorm = 1.1349, lr_0 = 3.1041e-04
Loss = 3.3609e-01, PNorm = 60.5586, GNorm = 1.3855, lr_0 = 3.1020e-04
Loss = 3.6338e-01, PNorm = 60.5615, GNorm = 1.7001, lr_0 = 3.0998e-04
Loss = 4.3977e-01, PNorm = 60.5675, GNorm = 1.6213, lr_0 = 3.0977e-04
Loss = 3.7240e-01, PNorm = 60.5730, GNorm = 1.6869, lr_0 = 3.0956e-04
Loss = 3.8350e-01, PNorm = 60.5743, GNorm = 1.1099, lr_0 = 3.0935e-04
Loss = 3.6856e-01, PNorm = 60.5746, GNorm = 1.7406, lr_0 = 3.0914e-04
Loss = 3.6192e-01, PNorm = 60.5826, GNorm = 1.4266, lr_0 = 3.0892e-04
Loss = 3.8409e-01, PNorm = 60.5864, GNorm = 1.2650, lr_0 = 3.0871e-04
Loss = 3.7030e-01, PNorm = 60.5885, GNorm = 1.2141, lr_0 = 3.0850e-04
Loss = 3.9425e-01, PNorm = 60.5920, GNorm = 2.3186, lr_0 = 3.0829e-04
Loss = 3.7156e-01, PNorm = 60.5959, GNorm = 1.6850, lr_0 = 3.0808e-04
Loss = 3.7976e-01, PNorm = 60.6012, GNorm = 1.7967, lr_0 = 3.0787e-04
Loss = 4.4540e-01, PNorm = 60.6011, GNorm = 1.5441, lr_0 = 3.0766e-04
Loss = 3.6497e-01, PNorm = 60.6047, GNorm = 1.5520, lr_0 = 3.0745e-04
Loss = 3.6596e-01, PNorm = 60.6103, GNorm = 1.9476, lr_0 = 3.0723e-04
Loss = 4.1307e-01, PNorm = 60.6152, GNorm = 1.5263, lr_0 = 3.0702e-04
Loss = 3.9529e-01, PNorm = 60.6177, GNorm = 1.3976, lr_0 = 3.0681e-04
Loss = 3.3596e-01, PNorm = 60.6204, GNorm = 1.4157, lr_0 = 3.0660e-04
Loss = 3.5352e-01, PNorm = 60.6228, GNorm = 1.3130, lr_0 = 3.0639e-04
Loss = 3.6223e-01, PNorm = 60.6272, GNorm = 1.4242, lr_0 = 3.0618e-04
Loss = 4.2297e-01, PNorm = 60.6300, GNorm = 1.4945, lr_0 = 3.0597e-04
Loss = 3.5790e-01, PNorm = 60.6369, GNorm = 1.3254, lr_0 = 3.0576e-04
Loss = 3.9357e-01, PNorm = 60.6403, GNorm = 1.9591, lr_0 = 3.0555e-04
Loss = 3.6198e-01, PNorm = 60.6406, GNorm = 1.3380, lr_0 = 3.0535e-04
Loss = 4.0072e-01, PNorm = 60.6393, GNorm = 0.9939, lr_0 = 3.0514e-04
Loss = 4.0234e-01, PNorm = 60.6453, GNorm = 1.3498, lr_0 = 3.0493e-04
Loss = 3.4126e-01, PNorm = 60.6544, GNorm = 1.6657, lr_0 = 3.0472e-04
Loss = 3.6790e-01, PNorm = 60.6605, GNorm = 1.7499, lr_0 = 3.0451e-04
Loss = 3.7696e-01, PNorm = 60.6635, GNorm = 1.0006, lr_0 = 3.0430e-04
Loss = 3.9245e-01, PNorm = 60.6697, GNorm = 1.3442, lr_0 = 3.0409e-04
Loss = 3.9399e-01, PNorm = 60.6738, GNorm = 1.4694, lr_0 = 3.0388e-04
Loss = 4.2759e-01, PNorm = 60.6717, GNorm = 2.1499, lr_0 = 3.0368e-04
Loss = 3.8269e-01, PNorm = 60.6773, GNorm = 1.1650, lr_0 = 3.0347e-04
Loss = 3.4941e-01, PNorm = 60.6788, GNorm = 1.3740, lr_0 = 3.0326e-04
Loss = 3.7593e-01, PNorm = 60.6846, GNorm = 0.8977, lr_0 = 3.0305e-04
Loss = 3.6437e-01, PNorm = 60.6878, GNorm = 1.6759, lr_0 = 3.0284e-04
Loss = 3.6562e-01, PNorm = 60.6921, GNorm = 1.3346, lr_0 = 3.0264e-04
Loss = 3.4421e-01, PNorm = 60.6987, GNorm = 1.4079, lr_0 = 3.0243e-04
Loss = 3.8148e-01, PNorm = 60.7013, GNorm = 1.6894, lr_0 = 3.0222e-04
Loss = 3.9989e-01, PNorm = 60.7060, GNorm = 1.3821, lr_0 = 3.0202e-04
Loss = 3.6755e-01, PNorm = 60.7078, GNorm = 1.2158, lr_0 = 3.0181e-04
Loss = 4.7075e-01, PNorm = 60.7126, GNorm = 1.6187, lr_0 = 3.0160e-04
Loss = 4.1406e-01, PNorm = 60.7157, GNorm = 1.5949, lr_0 = 3.0140e-04
Loss = 3.6463e-01, PNorm = 60.7176, GNorm = 2.1001, lr_0 = 3.0119e-04
Loss = 3.9021e-01, PNorm = 60.7203, GNorm = 1.0562, lr_0 = 3.0098e-04
Loss = 3.9058e-01, PNorm = 60.7233, GNorm = 1.1129, lr_0 = 3.0078e-04
Loss = 4.1986e-01, PNorm = 60.7288, GNorm = 1.5951, lr_0 = 3.0057e-04
Loss = 4.0726e-01, PNorm = 60.7359, GNorm = 1.4525, lr_0 = 3.0036e-04
Loss = 3.5666e-01, PNorm = 60.7396, GNorm = 1.3819, lr_0 = 3.0016e-04
Loss = 3.3813e-01, PNorm = 60.7430, GNorm = 1.5957, lr_0 = 2.9995e-04
Loss = 3.3495e-01, PNorm = 60.7471, GNorm = 1.2159, lr_0 = 2.9975e-04
Loss = 3.8100e-01, PNorm = 60.7454, GNorm = 1.5055, lr_0 = 2.9954e-04
Loss = 3.6551e-01, PNorm = 60.7495, GNorm = 1.2510, lr_0 = 2.9934e-04
Loss = 3.5291e-01, PNorm = 60.7530, GNorm = 1.3128, lr_0 = 2.9913e-04
Loss = 3.5567e-01, PNorm = 60.7611, GNorm = 1.3227, lr_0 = 2.9893e-04
Loss = 4.0263e-01, PNorm = 60.7650, GNorm = 0.9329, lr_0 = 2.9872e-04
Loss = 3.8297e-01, PNorm = 60.7717, GNorm = 1.8221, lr_0 = 2.9852e-04
Loss = 3.3453e-01, PNorm = 60.7772, GNorm = 1.6920, lr_0 = 2.9831e-04
Loss = 3.9621e-01, PNorm = 60.7765, GNorm = 1.1942, lr_0 = 2.9811e-04
Loss = 4.2518e-01, PNorm = 60.7762, GNorm = 1.8570, lr_0 = 2.9790e-04
Loss = 3.8353e-01, PNorm = 60.7783, GNorm = 2.0459, lr_0 = 2.9770e-04
Loss = 3.4582e-01, PNorm = 60.7843, GNorm = 1.0846, lr_0 = 2.9750e-04
Loss = 3.5295e-01, PNorm = 60.7845, GNorm = 1.8628, lr_0 = 2.9729e-04
Loss = 4.1859e-01, PNorm = 60.7899, GNorm = 1.7573, lr_0 = 2.9709e-04
Loss = 3.7293e-01, PNorm = 60.7936, GNorm = 1.4410, lr_0 = 2.9689e-04
Loss = 3.6756e-01, PNorm = 60.7969, GNorm = 0.9487, lr_0 = 2.9668e-04
Loss = 3.7278e-01, PNorm = 60.8019, GNorm = 1.5563, lr_0 = 2.9648e-04
Loss = 3.4314e-01, PNorm = 60.8057, GNorm = 0.9939, lr_0 = 2.9628e-04
Loss = 3.7896e-01, PNorm = 60.8096, GNorm = 1.1007, lr_0 = 2.9607e-04
Loss = 3.9867e-01, PNorm = 60.8086, GNorm = 2.0405, lr_0 = 2.9587e-04
Loss = 3.8621e-01, PNorm = 60.8141, GNorm = 1.4662, lr_0 = 2.9567e-04
Loss = 3.8235e-01, PNorm = 60.8181, GNorm = 1.4694, lr_0 = 2.9546e-04
Loss = 3.8521e-01, PNorm = 60.8231, GNorm = 1.4528, lr_0 = 2.9526e-04
Loss = 3.2677e-01, PNorm = 60.8241, GNorm = 1.8684, lr_0 = 2.9506e-04
Loss = 4.0312e-01, PNorm = 60.8250, GNorm = 1.4387, lr_0 = 2.9486e-04
Loss = 3.6387e-01, PNorm = 60.8257, GNorm = 0.8807, lr_0 = 2.9466e-04
Loss = 4.1003e-01, PNorm = 60.8324, GNorm = 1.2610, lr_0 = 2.9445e-04
Loss = 3.6905e-01, PNorm = 60.8381, GNorm = 0.9671, lr_0 = 2.9425e-04
Loss = 3.8009e-01, PNorm = 60.8362, GNorm = 1.6319, lr_0 = 2.9405e-04
Loss = 4.3585e-01, PNorm = 60.8404, GNorm = 1.0702, lr_0 = 2.9385e-04
Loss = 3.6241e-01, PNorm = 60.8466, GNorm = 1.0701, lr_0 = 2.9365e-04
Loss = 3.8279e-01, PNorm = 60.8484, GNorm = 1.4856, lr_0 = 2.9345e-04
Loss = 4.4112e-01, PNorm = 60.8496, GNorm = 1.9244, lr_0 = 2.9325e-04
Loss = 3.9012e-01, PNorm = 60.8522, GNorm = 1.7197, lr_0 = 2.9305e-04
Loss = 3.7409e-01, PNorm = 60.8550, GNorm = 1.5943, lr_0 = 2.9284e-04
Loss = 3.8274e-01, PNorm = 60.8607, GNorm = 1.1523, lr_0 = 2.9264e-04
Loss = 3.6955e-01, PNorm = 60.8657, GNorm = 1.2219, lr_0 = 2.9244e-04
Loss = 4.1896e-01, PNorm = 60.8682, GNorm = 1.6798, lr_0 = 2.9224e-04
Loss = 3.4165e-01, PNorm = 60.8739, GNorm = 1.2407, lr_0 = 2.9204e-04
Loss = 4.2878e-01, PNorm = 60.8741, GNorm = 1.1811, lr_0 = 2.9184e-04
Loss = 3.7070e-01, PNorm = 60.8764, GNorm = 1.3040, lr_0 = 2.9164e-04
Loss = 3.6706e-01, PNorm = 60.8826, GNorm = 1.6084, lr_0 = 2.9144e-04
Loss = 3.3163e-01, PNorm = 60.8856, GNorm = 1.6014, lr_0 = 2.9124e-04
Validation mae = 0.113105
Epoch 17
Loss = 3.5806e-01, PNorm = 60.8868, GNorm = 1.2988, lr_0 = 2.9104e-04
Loss = 3.3020e-01, PNorm = 60.8894, GNorm = 1.3785, lr_0 = 2.9084e-04
Loss = 3.6080e-01, PNorm = 60.8965, GNorm = 1.2958, lr_0 = 2.9065e-04
Loss = 3.2904e-01, PNorm = 60.8997, GNorm = 1.2433, lr_0 = 2.9045e-04
Loss = 4.0395e-01, PNorm = 60.9051, GNorm = 1.8976, lr_0 = 2.9025e-04
Loss = 3.7557e-01, PNorm = 60.9122, GNorm = 1.2156, lr_0 = 2.9005e-04
Loss = 3.5757e-01, PNorm = 60.9155, GNorm = 1.6909, lr_0 = 2.8985e-04
Loss = 3.3065e-01, PNorm = 60.9129, GNorm = 1.3074, lr_0 = 2.8965e-04
Loss = 3.6868e-01, PNorm = 60.9173, GNorm = 1.7144, lr_0 = 2.8945e-04
Loss = 4.0286e-01, PNorm = 60.9221, GNorm = 1.1852, lr_0 = 2.8925e-04
Loss = 4.0763e-01, PNorm = 60.9291, GNorm = 1.9151, lr_0 = 2.8906e-04
Loss = 3.9459e-01, PNorm = 60.9356, GNorm = 1.4706, lr_0 = 2.8886e-04
Loss = 3.1023e-01, PNorm = 60.9362, GNorm = 1.1192, lr_0 = 2.8866e-04
Loss = 3.6476e-01, PNorm = 60.9373, GNorm = 1.0091, lr_0 = 2.8846e-04
Loss = 3.2235e-01, PNorm = 60.9411, GNorm = 1.5675, lr_0 = 2.8826e-04
Loss = 4.5196e-01, PNorm = 60.9442, GNorm = 2.5966, lr_0 = 2.8807e-04
Loss = 3.5975e-01, PNorm = 60.9473, GNorm = 1.2603, lr_0 = 2.8787e-04
Loss = 3.5520e-01, PNorm = 60.9466, GNorm = 1.4349, lr_0 = 2.8767e-04
Loss = 3.8564e-01, PNorm = 60.9536, GNorm = 1.4371, lr_0 = 2.8748e-04
Loss = 3.5671e-01, PNorm = 60.9550, GNorm = 1.5939, lr_0 = 2.8728e-04
Loss = 3.7579e-01, PNorm = 60.9575, GNorm = 1.3054, lr_0 = 2.8708e-04
Loss = 3.9726e-01, PNorm = 60.9613, GNorm = 1.2145, lr_0 = 2.8689e-04
Loss = 3.2369e-01, PNorm = 60.9679, GNorm = 1.1911, lr_0 = 2.8669e-04
Loss = 3.2911e-01, PNorm = 60.9728, GNorm = 1.2285, lr_0 = 2.8649e-04
Loss = 3.5677e-01, PNorm = 60.9742, GNorm = 1.2609, lr_0 = 2.8630e-04
Loss = 4.0790e-01, PNorm = 60.9790, GNorm = 1.8418, lr_0 = 2.8610e-04
Loss = 3.8506e-01, PNorm = 60.9804, GNorm = 1.3057, lr_0 = 2.8590e-04
Loss = 3.6331e-01, PNorm = 60.9835, GNorm = 0.9386, lr_0 = 2.8571e-04
Loss = 3.7657e-01, PNorm = 60.9891, GNorm = 1.0899, lr_0 = 2.8551e-04
Loss = 3.5864e-01, PNorm = 60.9935, GNorm = 1.8164, lr_0 = 2.8532e-04
Loss = 3.9230e-01, PNorm = 60.9972, GNorm = 1.4763, lr_0 = 2.8512e-04
Loss = 3.4840e-01, PNorm = 61.0025, GNorm = 1.2768, lr_0 = 2.8493e-04
Loss = 3.5992e-01, PNorm = 61.0090, GNorm = 1.4000, lr_0 = 2.8473e-04
Loss = 4.1816e-01, PNorm = 61.0099, GNorm = 1.3408, lr_0 = 2.8454e-04
Loss = 3.1599e-01, PNorm = 61.0145, GNorm = 1.4444, lr_0 = 2.8434e-04
Loss = 4.2155e-01, PNorm = 61.0157, GNorm = 1.3416, lr_0 = 2.8415e-04
Loss = 3.5649e-01, PNorm = 61.0204, GNorm = 1.2866, lr_0 = 2.8395e-04
Loss = 3.5640e-01, PNorm = 61.0248, GNorm = 1.1506, lr_0 = 2.8376e-04
Loss = 3.4006e-01, PNorm = 61.0283, GNorm = 1.1014, lr_0 = 2.8356e-04
Loss = 3.7550e-01, PNorm = 61.0289, GNorm = 1.6203, lr_0 = 2.8337e-04
Loss = 3.7279e-01, PNorm = 61.0281, GNorm = 1.4044, lr_0 = 2.8317e-04
Loss = 4.0976e-01, PNorm = 61.0316, GNorm = 1.4620, lr_0 = 2.8298e-04
Loss = 3.7175e-01, PNorm = 61.0368, GNorm = 1.5432, lr_0 = 2.8279e-04
Loss = 3.8374e-01, PNorm = 61.0410, GNorm = 1.0540, lr_0 = 2.8259e-04
Loss = 4.0746e-01, PNorm = 61.0434, GNorm = 1.4087, lr_0 = 2.8240e-04
Loss = 3.6787e-01, PNorm = 61.0486, GNorm = 1.0580, lr_0 = 2.8221e-04
Loss = 4.2793e-01, PNorm = 61.0507, GNorm = 1.8486, lr_0 = 2.8201e-04
Loss = 4.0970e-01, PNorm = 61.0591, GNorm = 1.5596, lr_0 = 2.8182e-04
Loss = 3.4161e-01, PNorm = 61.0646, GNorm = 1.5920, lr_0 = 2.8163e-04
Loss = 3.9269e-01, PNorm = 61.0631, GNorm = 1.1693, lr_0 = 2.8143e-04
Loss = 4.0923e-01, PNorm = 61.0731, GNorm = 2.0183, lr_0 = 2.8124e-04
Loss = 4.0729e-01, PNorm = 61.0747, GNorm = 1.6661, lr_0 = 2.8105e-04
Loss = 3.7302e-01, PNorm = 61.0793, GNorm = 2.1097, lr_0 = 2.8085e-04
Loss = 3.9983e-01, PNorm = 61.0799, GNorm = 1.6240, lr_0 = 2.8066e-04
Loss = 3.5875e-01, PNorm = 61.0831, GNorm = 1.4212, lr_0 = 2.8047e-04
Loss = 4.0004e-01, PNorm = 61.0868, GNorm = 1.9087, lr_0 = 2.8028e-04
Loss = 3.7607e-01, PNorm = 61.0859, GNorm = 1.2718, lr_0 = 2.8009e-04
Loss = 4.1736e-01, PNorm = 61.0880, GNorm = 1.4513, lr_0 = 2.7989e-04
Loss = 3.3167e-01, PNorm = 61.0899, GNorm = 1.5199, lr_0 = 2.7970e-04
Loss = 4.3049e-01, PNorm = 61.0904, GNorm = 1.4702, lr_0 = 2.7951e-04
Loss = 3.7812e-01, PNorm = 61.0973, GNorm = 1.4221, lr_0 = 2.7932e-04
Loss = 3.1892e-01, PNorm = 61.1038, GNorm = 1.8364, lr_0 = 2.7913e-04
Loss = 3.1491e-01, PNorm = 61.1081, GNorm = 1.0737, lr_0 = 2.7894e-04
Loss = 3.4735e-01, PNorm = 61.1126, GNorm = 1.7023, lr_0 = 2.7875e-04
Loss = 3.6263e-01, PNorm = 61.1150, GNorm = 1.1840, lr_0 = 2.7855e-04
Loss = 3.7696e-01, PNorm = 61.1191, GNorm = 0.9864, lr_0 = 2.7836e-04
Loss = 3.6358e-01, PNorm = 61.1201, GNorm = 1.5067, lr_0 = 2.7817e-04
Loss = 3.8267e-01, PNorm = 61.1230, GNorm = 1.3918, lr_0 = 2.7798e-04
Loss = 3.9875e-01, PNorm = 61.1280, GNorm = 1.6306, lr_0 = 2.7779e-04
Loss = 4.0594e-01, PNorm = 61.1335, GNorm = 0.9235, lr_0 = 2.7760e-04
Loss = 3.4511e-01, PNorm = 61.1374, GNorm = 0.8536, lr_0 = 2.7741e-04
Loss = 3.5642e-01, PNorm = 61.1417, GNorm = 1.2919, lr_0 = 2.7722e-04
Loss = 3.7053e-01, PNorm = 61.1460, GNorm = 1.8442, lr_0 = 2.7703e-04
Loss = 3.8476e-01, PNorm = 61.1440, GNorm = 1.8471, lr_0 = 2.7684e-04
Loss = 4.0187e-01, PNorm = 61.1476, GNorm = 1.1207, lr_0 = 2.7665e-04
Loss = 3.7967e-01, PNorm = 61.1503, GNorm = 1.0780, lr_0 = 2.7646e-04
Loss = 3.7415e-01, PNorm = 61.1556, GNorm = 1.1517, lr_0 = 2.7627e-04
Loss = 4.2661e-01, PNorm = 61.1628, GNorm = 1.6026, lr_0 = 2.7608e-04
Loss = 3.5231e-01, PNorm = 61.1670, GNorm = 2.1078, lr_0 = 2.7590e-04
Loss = 3.8479e-01, PNorm = 61.1697, GNorm = 1.4232, lr_0 = 2.7571e-04
Loss = 4.0452e-01, PNorm = 61.1744, GNorm = 1.3902, lr_0 = 2.7552e-04
Loss = 3.5886e-01, PNorm = 61.1769, GNorm = 1.5328, lr_0 = 2.7533e-04
Loss = 3.7197e-01, PNorm = 61.1790, GNorm = 1.0573, lr_0 = 2.7514e-04
Loss = 3.8507e-01, PNorm = 61.1833, GNorm = 1.7445, lr_0 = 2.7495e-04
Loss = 3.3620e-01, PNorm = 61.1877, GNorm = 1.6833, lr_0 = 2.7476e-04
Loss = 3.4132e-01, PNorm = 61.1950, GNorm = 0.9683, lr_0 = 2.7457e-04
Loss = 4.5222e-01, PNorm = 61.1973, GNorm = 1.7864, lr_0 = 2.7439e-04
Loss = 3.8274e-01, PNorm = 61.2014, GNorm = 1.5457, lr_0 = 2.7420e-04
Loss = 3.4845e-01, PNorm = 61.2041, GNorm = 1.0313, lr_0 = 2.7401e-04
Loss = 3.9936e-01, PNorm = 61.2032, GNorm = 1.4856, lr_0 = 2.7382e-04
Loss = 3.8402e-01, PNorm = 61.2070, GNorm = 1.3420, lr_0 = 2.7364e-04
Loss = 3.6125e-01, PNorm = 61.2095, GNorm = 0.9733, lr_0 = 2.7345e-04
Loss = 4.0483e-01, PNorm = 61.2123, GNorm = 1.5522, lr_0 = 2.7326e-04
Loss = 3.9830e-01, PNorm = 61.2137, GNorm = 1.5359, lr_0 = 2.7307e-04
Loss = 4.3800e-01, PNorm = 61.2141, GNorm = 1.5269, lr_0 = 2.7289e-04
Loss = 3.6328e-01, PNorm = 61.2201, GNorm = 1.6251, lr_0 = 2.7270e-04
Loss = 4.2116e-01, PNorm = 61.2222, GNorm = 1.4581, lr_0 = 2.7251e-04
Loss = 3.6989e-01, PNorm = 61.2230, GNorm = 1.6146, lr_0 = 2.7233e-04
Loss = 3.9698e-01, PNorm = 61.2271, GNorm = 1.5913, lr_0 = 2.7214e-04
Loss = 3.6477e-01, PNorm = 61.2312, GNorm = 1.1802, lr_0 = 2.7195e-04
Loss = 4.0219e-01, PNorm = 61.2350, GNorm = 1.5456, lr_0 = 2.7177e-04
Loss = 3.9936e-01, PNorm = 61.2379, GNorm = 1.5245, lr_0 = 2.7158e-04
Loss = 3.4647e-01, PNorm = 61.2398, GNorm = 1.5120, lr_0 = 2.7139e-04
Loss = 3.6860e-01, PNorm = 61.2436, GNorm = 1.2276, lr_0 = 2.7121e-04
Loss = 3.7445e-01, PNorm = 61.2467, GNorm = 1.5819, lr_0 = 2.7102e-04
Loss = 3.9840e-01, PNorm = 61.2539, GNorm = 1.3522, lr_0 = 2.7084e-04
Loss = 3.5324e-01, PNorm = 61.2574, GNorm = 1.6225, lr_0 = 2.7065e-04
Loss = 3.5391e-01, PNorm = 61.2622, GNorm = 1.1196, lr_0 = 2.7047e-04
Loss = 3.7650e-01, PNorm = 61.2643, GNorm = 1.3219, lr_0 = 2.7028e-04
Loss = 3.8746e-01, PNorm = 61.2664, GNorm = 1.8421, lr_0 = 2.7010e-04
Loss = 3.4739e-01, PNorm = 61.2698, GNorm = 1.4458, lr_0 = 2.6991e-04
Loss = 3.6852e-01, PNorm = 61.2737, GNorm = 2.3893, lr_0 = 2.6973e-04
Loss = 3.8765e-01, PNorm = 61.2764, GNorm = 1.4142, lr_0 = 2.6954e-04
Loss = 4.2810e-01, PNorm = 61.2807, GNorm = 1.8308, lr_0 = 2.6936e-04
Loss = 3.8514e-01, PNorm = 61.2846, GNorm = 1.6836, lr_0 = 2.6917e-04
Loss = 4.4503e-01, PNorm = 61.2882, GNorm = 2.0841, lr_0 = 2.6899e-04
Loss = 3.6342e-01, PNorm = 61.2909, GNorm = 1.1563, lr_0 = 2.6880e-04
Loss = 3.4446e-01, PNorm = 61.2975, GNorm = 1.3760, lr_0 = 2.6862e-04
Loss = 3.2704e-01, PNorm = 61.3014, GNorm = 1.7786, lr_0 = 2.6844e-04
Loss = 3.7640e-01, PNorm = 61.3036, GNorm = 1.8177, lr_0 = 2.6825e-04
Validation mae = 0.113438
Epoch 18
Loss = 3.6616e-01, PNorm = 61.3106, GNorm = 1.9551, lr_0 = 2.6807e-04
Loss = 3.6106e-01, PNorm = 61.3121, GNorm = 0.9976, lr_0 = 2.6788e-04
Loss = 4.2685e-01, PNorm = 61.3160, GNorm = 2.0220, lr_0 = 2.6770e-04
Loss = 3.5855e-01, PNorm = 61.3193, GNorm = 1.9259, lr_0 = 2.6752e-04
Loss = 3.7613e-01, PNorm = 61.3182, GNorm = 1.0261, lr_0 = 2.6733e-04
Loss = 3.8226e-01, PNorm = 61.3232, GNorm = 1.2330, lr_0 = 2.6715e-04
Loss = 4.2024e-01, PNorm = 61.3290, GNorm = 1.8440, lr_0 = 2.6697e-04
Loss = 3.8555e-01, PNorm = 61.3321, GNorm = 1.2381, lr_0 = 2.6678e-04
Loss = 3.9164e-01, PNorm = 61.3381, GNorm = 1.2378, lr_0 = 2.6660e-04
Loss = 3.2210e-01, PNorm = 61.3437, GNorm = 1.8762, lr_0 = 2.6642e-04
Loss = 4.0548e-01, PNorm = 61.3444, GNorm = 2.4002, lr_0 = 2.6624e-04
Loss = 3.4874e-01, PNorm = 61.3471, GNorm = 1.3729, lr_0 = 2.6605e-04
Loss = 4.0360e-01, PNorm = 61.3464, GNorm = 1.2104, lr_0 = 2.6587e-04
Loss = 3.9918e-01, PNorm = 61.3510, GNorm = 2.4601, lr_0 = 2.6569e-04
Loss = 4.0084e-01, PNorm = 61.3516, GNorm = 1.0858, lr_0 = 2.6551e-04
Loss = 3.6920e-01, PNorm = 61.3545, GNorm = 1.1888, lr_0 = 2.6533e-04
Loss = 3.7252e-01, PNorm = 61.3595, GNorm = 1.2808, lr_0 = 2.6514e-04
Loss = 3.3820e-01, PNorm = 61.3627, GNorm = 1.3807, lr_0 = 2.6496e-04
Loss = 3.3644e-01, PNorm = 61.3663, GNorm = 1.0333, lr_0 = 2.6478e-04
Loss = 4.0298e-01, PNorm = 61.3699, GNorm = 1.3858, lr_0 = 2.6460e-04
Loss = 3.8131e-01, PNorm = 61.3711, GNorm = 1.1874, lr_0 = 2.6442e-04
Loss = 3.9671e-01, PNorm = 61.3738, GNorm = 1.5336, lr_0 = 2.6424e-04
Loss = 3.7697e-01, PNorm = 61.3738, GNorm = 1.1910, lr_0 = 2.6406e-04
Loss = 3.3474e-01, PNorm = 61.3782, GNorm = 1.5441, lr_0 = 2.6388e-04
Loss = 3.3129e-01, PNorm = 61.3823, GNorm = 1.0633, lr_0 = 2.6369e-04
Loss = 4.1036e-01, PNorm = 61.3852, GNorm = 1.2092, lr_0 = 2.6351e-04
Loss = 3.3937e-01, PNorm = 61.3895, GNorm = 1.3232, lr_0 = 2.6333e-04
Loss = 3.8683e-01, PNorm = 61.3937, GNorm = 1.5353, lr_0 = 2.6315e-04
Loss = 3.4888e-01, PNorm = 61.3969, GNorm = 1.0914, lr_0 = 2.6297e-04
Loss = 3.7248e-01, PNorm = 61.3974, GNorm = 1.9218, lr_0 = 2.6279e-04
Loss = 3.5217e-01, PNorm = 61.3988, GNorm = 1.3214, lr_0 = 2.6261e-04
Loss = 3.7818e-01, PNorm = 61.4036, GNorm = 1.2851, lr_0 = 2.6243e-04
Loss = 3.6157e-01, PNorm = 61.4059, GNorm = 1.2125, lr_0 = 2.6225e-04
Loss = 3.8173e-01, PNorm = 61.4076, GNorm = 1.3519, lr_0 = 2.6207e-04
Loss = 3.4188e-01, PNorm = 61.4116, GNorm = 1.2260, lr_0 = 2.6189e-04
Loss = 4.0575e-01, PNorm = 61.4162, GNorm = 1.2501, lr_0 = 2.6171e-04
Loss = 4.3918e-01, PNorm = 61.4205, GNorm = 1.3118, lr_0 = 2.6153e-04
Loss = 3.7725e-01, PNorm = 61.4267, GNorm = 1.9982, lr_0 = 2.6136e-04
Loss = 3.1751e-01, PNorm = 61.4303, GNorm = 1.4683, lr_0 = 2.6118e-04
Loss = 3.7200e-01, PNorm = 61.4328, GNorm = 2.0017, lr_0 = 2.6100e-04
Loss = 3.9207e-01, PNorm = 61.4371, GNorm = 1.2726, lr_0 = 2.6082e-04
Loss = 3.5008e-01, PNorm = 61.4422, GNorm = 1.2206, lr_0 = 2.6064e-04
Loss = 3.7411e-01, PNorm = 61.4456, GNorm = 1.7549, lr_0 = 2.6046e-04
Loss = 3.8109e-01, PNorm = 61.4486, GNorm = 1.5580, lr_0 = 2.6028e-04
Loss = 3.8646e-01, PNorm = 61.4480, GNorm = 1.3463, lr_0 = 2.6011e-04
Loss = 3.6319e-01, PNorm = 61.4496, GNorm = 1.2461, lr_0 = 2.5993e-04
Loss = 3.7787e-01, PNorm = 61.4534, GNorm = 1.6118, lr_0 = 2.5975e-04
Loss = 3.5589e-01, PNorm = 61.4569, GNorm = 1.0710, lr_0 = 2.5957e-04
Loss = 3.8601e-01, PNorm = 61.4607, GNorm = 1.4284, lr_0 = 2.5939e-04
Loss = 3.4351e-01, PNorm = 61.4619, GNorm = 1.5018, lr_0 = 2.5922e-04
Loss = 3.4205e-01, PNorm = 61.4623, GNorm = 1.4632, lr_0 = 2.5904e-04
Loss = 3.4999e-01, PNorm = 61.4679, GNorm = 1.2314, lr_0 = 2.5886e-04
Loss = 3.5196e-01, PNorm = 61.4730, GNorm = 1.2291, lr_0 = 2.5868e-04
Loss = 4.1170e-01, PNorm = 61.4719, GNorm = 1.1179, lr_0 = 2.5851e-04
Loss = 3.3338e-01, PNorm = 61.4763, GNorm = 1.4447, lr_0 = 2.5833e-04
Loss = 4.0572e-01, PNorm = 61.4775, GNorm = 1.3848, lr_0 = 2.5815e-04
Loss = 3.3738e-01, PNorm = 61.4799, GNorm = 1.3323, lr_0 = 2.5797e-04
Loss = 4.5757e-01, PNorm = 61.4853, GNorm = 1.6486, lr_0 = 2.5780e-04
Loss = 3.8824e-01, PNorm = 61.4917, GNorm = 2.2307, lr_0 = 2.5762e-04
Loss = 3.4851e-01, PNorm = 61.4937, GNorm = 0.9449, lr_0 = 2.5745e-04
Loss = 4.6272e-01, PNorm = 61.5010, GNorm = 1.8252, lr_0 = 2.5727e-04
Loss = 4.0189e-01, PNorm = 61.5035, GNorm = 1.6075, lr_0 = 2.5709e-04
Loss = 3.4818e-01, PNorm = 61.5065, GNorm = 1.3046, lr_0 = 2.5692e-04
Loss = 4.3841e-01, PNorm = 61.5099, GNorm = 1.5257, lr_0 = 2.5674e-04
Loss = 3.3761e-01, PNorm = 61.5121, GNorm = 1.2521, lr_0 = 2.5656e-04
Loss = 3.5872e-01, PNorm = 61.5169, GNorm = 1.1903, lr_0 = 2.5639e-04
Loss = 3.4181e-01, PNorm = 61.5244, GNorm = 1.2983, lr_0 = 2.5621e-04
Loss = 3.8817e-01, PNorm = 61.5265, GNorm = 1.4704, lr_0 = 2.5604e-04
Loss = 4.2584e-01, PNorm = 61.5291, GNorm = 1.5796, lr_0 = 2.5586e-04
Loss = 3.3092e-01, PNorm = 61.5339, GNorm = 1.5135, lr_0 = 2.5569e-04
Loss = 3.8787e-01, PNorm = 61.5379, GNorm = 1.3645, lr_0 = 2.5551e-04
Loss = 4.6735e-01, PNorm = 61.5437, GNorm = 2.3120, lr_0 = 2.5534e-04
Loss = 3.1528e-01, PNorm = 61.5468, GNorm = 1.4549, lr_0 = 2.5516e-04
Loss = 3.3294e-01, PNorm = 61.5491, GNorm = 1.1113, lr_0 = 2.5499e-04
Loss = 3.5162e-01, PNorm = 61.5502, GNorm = 1.5108, lr_0 = 2.5481e-04
Loss = 3.6436e-01, PNorm = 61.5511, GNorm = 1.9885, lr_0 = 2.5464e-04
Loss = 3.4192e-01, PNorm = 61.5517, GNorm = 1.2504, lr_0 = 2.5446e-04
Loss = 3.5697e-01, PNorm = 61.5573, GNorm = 1.4264, lr_0 = 2.5429e-04
Loss = 3.8839e-01, PNorm = 61.5593, GNorm = 0.9651, lr_0 = 2.5411e-04
Loss = 3.5622e-01, PNorm = 61.5599, GNorm = 1.4813, lr_0 = 2.5394e-04
Loss = 3.9864e-01, PNorm = 61.5632, GNorm = 1.6201, lr_0 = 2.5377e-04
Loss = 3.8165e-01, PNorm = 61.5694, GNorm = 1.3567, lr_0 = 2.5359e-04
Loss = 4.2677e-01, PNorm = 61.5725, GNorm = 1.0889, lr_0 = 2.5342e-04
Loss = 4.0250e-01, PNorm = 61.5715, GNorm = 1.3911, lr_0 = 2.5325e-04
Loss = 3.1772e-01, PNorm = 61.5732, GNorm = 1.3455, lr_0 = 2.5307e-04
Loss = 3.6173e-01, PNorm = 61.5792, GNorm = 1.3654, lr_0 = 2.5290e-04
Loss = 3.9797e-01, PNorm = 61.5789, GNorm = 1.5730, lr_0 = 2.5273e-04
Loss = 3.9520e-01, PNorm = 61.5825, GNorm = 1.3468, lr_0 = 2.5255e-04
Loss = 3.4081e-01, PNorm = 61.5857, GNorm = 1.2435, lr_0 = 2.5238e-04
Loss = 4.0278e-01, PNorm = 61.5886, GNorm = 1.4786, lr_0 = 2.5221e-04
Loss = 3.6812e-01, PNorm = 61.5936, GNorm = 1.4248, lr_0 = 2.5203e-04
Loss = 3.8854e-01, PNorm = 61.5968, GNorm = 2.0401, lr_0 = 2.5186e-04
Loss = 3.6467e-01, PNorm = 61.6001, GNorm = 1.4018, lr_0 = 2.5169e-04
Loss = 3.3961e-01, PNorm = 61.6017, GNorm = 1.2495, lr_0 = 2.5152e-04
Loss = 3.6841e-01, PNorm = 61.6038, GNorm = 1.2909, lr_0 = 2.5134e-04
Loss = 3.6336e-01, PNorm = 61.6056, GNorm = 0.8883, lr_0 = 2.5117e-04
Loss = 3.5004e-01, PNorm = 61.6105, GNorm = 1.2912, lr_0 = 2.5100e-04
Loss = 3.4030e-01, PNorm = 61.6112, GNorm = 1.5230, lr_0 = 2.5083e-04
Loss = 3.4595e-01, PNorm = 61.6142, GNorm = 1.0229, lr_0 = 2.5066e-04
Loss = 3.5674e-01, PNorm = 61.6168, GNorm = 1.2529, lr_0 = 2.5048e-04
Loss = 4.3305e-01, PNorm = 61.6192, GNorm = 2.4333, lr_0 = 2.5031e-04
Loss = 3.5323e-01, PNorm = 61.6203, GNorm = 1.5353, lr_0 = 2.5014e-04
Loss = 3.7899e-01, PNorm = 61.6239, GNorm = 0.9937, lr_0 = 2.4997e-04
Loss = 3.7853e-01, PNorm = 61.6250, GNorm = 1.0256, lr_0 = 2.4980e-04
Loss = 3.4409e-01, PNorm = 61.6275, GNorm = 1.1447, lr_0 = 2.4963e-04
Loss = 3.6262e-01, PNorm = 61.6315, GNorm = 1.4057, lr_0 = 2.4946e-04
Loss = 3.6113e-01, PNorm = 61.6344, GNorm = 1.1923, lr_0 = 2.4929e-04
Loss = 3.3142e-01, PNorm = 61.6394, GNorm = 1.4998, lr_0 = 2.4911e-04
Loss = 3.7172e-01, PNorm = 61.6450, GNorm = 1.6417, lr_0 = 2.4894e-04
Loss = 3.3511e-01, PNorm = 61.6438, GNorm = 1.2241, lr_0 = 2.4877e-04
Loss = 3.7739e-01, PNorm = 61.6462, GNorm = 1.9513, lr_0 = 2.4860e-04
Loss = 3.6332e-01, PNorm = 61.6482, GNorm = 1.0263, lr_0 = 2.4843e-04
Loss = 4.1213e-01, PNorm = 61.6519, GNorm = 1.5002, lr_0 = 2.4826e-04
Loss = 4.0169e-01, PNorm = 61.6543, GNorm = 1.1828, lr_0 = 2.4809e-04
Loss = 3.7412e-01, PNorm = 61.6566, GNorm = 1.2467, lr_0 = 2.4792e-04
Loss = 3.4497e-01, PNorm = 61.6582, GNorm = 1.4056, lr_0 = 2.4775e-04
Loss = 3.2880e-01, PNorm = 61.6583, GNorm = 1.2466, lr_0 = 2.4758e-04
Loss = 3.4048e-01, PNorm = 61.6629, GNorm = 1.8530, lr_0 = 2.4741e-04
Loss = 3.5586e-01, PNorm = 61.6691, GNorm = 1.3056, lr_0 = 2.4724e-04
Loss = 3.6716e-01, PNorm = 61.6682, GNorm = 1.1967, lr_0 = 2.4707e-04
Validation mae = 0.112208
Epoch 19
Loss = 4.2416e-01, PNorm = 61.6705, GNorm = 1.9026, lr_0 = 2.4690e-04
Loss = 3.5395e-01, PNorm = 61.6781, GNorm = 1.2919, lr_0 = 2.4674e-04
Loss = 3.7402e-01, PNorm = 61.6811, GNorm = 1.7715, lr_0 = 2.4657e-04
Loss = 3.8422e-01, PNorm = 61.6861, GNorm = 1.4970, lr_0 = 2.4640e-04
Loss = 3.6199e-01, PNorm = 61.6854, GNorm = 1.5379, lr_0 = 2.4623e-04
Loss = 3.7770e-01, PNorm = 61.6871, GNorm = 1.5865, lr_0 = 2.4606e-04
Loss = 3.8159e-01, PNorm = 61.6918, GNorm = 1.1909, lr_0 = 2.4589e-04
Loss = 3.6186e-01, PNorm = 61.6923, GNorm = 1.4659, lr_0 = 2.4572e-04
Loss = 3.6898e-01, PNorm = 61.6961, GNorm = 1.2863, lr_0 = 2.4556e-04
Loss = 4.2064e-01, PNorm = 61.6986, GNorm = 1.9120, lr_0 = 2.4539e-04
Loss = 4.6018e-01, PNorm = 61.6960, GNorm = 4.8119, lr_0 = 2.4522e-04
Loss = 3.7341e-01, PNorm = 61.6984, GNorm = 1.3943, lr_0 = 2.4505e-04
Loss = 3.7717e-01, PNorm = 61.7011, GNorm = 1.7095, lr_0 = 2.4488e-04
Loss = 3.4065e-01, PNorm = 61.7015, GNorm = 0.8753, lr_0 = 2.4472e-04
Loss = 3.5756e-01, PNorm = 61.7031, GNorm = 1.6659, lr_0 = 2.4455e-04
Loss = 3.7719e-01, PNorm = 61.7084, GNorm = 1.5115, lr_0 = 2.4438e-04
Loss = 3.5941e-01, PNorm = 61.7138, GNorm = 0.9549, lr_0 = 2.4421e-04
Loss = 4.1012e-01, PNorm = 61.7147, GNorm = 0.9590, lr_0 = 2.4405e-04
Loss = 3.7765e-01, PNorm = 61.7191, GNorm = 1.2801, lr_0 = 2.4388e-04
Loss = 3.2818e-01, PNorm = 61.7239, GNorm = 1.3735, lr_0 = 2.4371e-04
Loss = 3.4806e-01, PNorm = 61.7260, GNorm = 1.6229, lr_0 = 2.4354e-04
Loss = 3.2295e-01, PNorm = 61.7290, GNorm = 1.2098, lr_0 = 2.4338e-04
Loss = 3.8528e-01, PNorm = 61.7301, GNorm = 1.7144, lr_0 = 2.4321e-04
Loss = 3.4822e-01, PNorm = 61.7357, GNorm = 1.9219, lr_0 = 2.4304e-04
Loss = 3.6366e-01, PNorm = 61.7393, GNorm = 1.2686, lr_0 = 2.4288e-04
Loss = 3.9421e-01, PNorm = 61.7394, GNorm = 1.3967, lr_0 = 2.4271e-04
Loss = 3.7546e-01, PNorm = 61.7427, GNorm = 2.1330, lr_0 = 2.4254e-04
Loss = 3.8565e-01, PNorm = 61.7462, GNorm = 1.3774, lr_0 = 2.4238e-04
Loss = 4.0409e-01, PNorm = 61.7505, GNorm = 1.4205, lr_0 = 2.4221e-04
Loss = 3.3926e-01, PNorm = 61.7523, GNorm = 1.4380, lr_0 = 2.4205e-04
Loss = 3.3949e-01, PNorm = 61.7550, GNorm = 1.2145, lr_0 = 2.4188e-04
Loss = 4.0645e-01, PNorm = 61.7531, GNorm = 1.3320, lr_0 = 2.4171e-04
Loss = 4.1828e-01, PNorm = 61.7582, GNorm = 1.0186, lr_0 = 2.4155e-04
Loss = 3.5094e-01, PNorm = 61.7624, GNorm = 1.3378, lr_0 = 2.4138e-04
Loss = 3.4362e-01, PNorm = 61.7669, GNorm = 1.6784, lr_0 = 2.4122e-04
Loss = 3.5059e-01, PNorm = 61.7695, GNorm = 1.3802, lr_0 = 2.4105e-04
Loss = 3.7245e-01, PNorm = 61.7699, GNorm = 1.6134, lr_0 = 2.4089e-04
Loss = 3.7788e-01, PNorm = 61.7702, GNorm = 1.2925, lr_0 = 2.4072e-04
Loss = 3.3626e-01, PNorm = 61.7750, GNorm = 1.1263, lr_0 = 2.4056e-04
Loss = 3.1569e-01, PNorm = 61.7786, GNorm = 1.2243, lr_0 = 2.4039e-04
Loss = 3.9428e-01, PNorm = 61.7797, GNorm = 1.1995, lr_0 = 2.4023e-04
Loss = 3.1959e-01, PNorm = 61.7816, GNorm = 1.0897, lr_0 = 2.4006e-04
Loss = 3.9636e-01, PNorm = 61.7841, GNorm = 1.1878, lr_0 = 2.3990e-04
Loss = 4.0112e-01, PNorm = 61.7881, GNorm = 1.3216, lr_0 = 2.3974e-04
Loss = 3.5088e-01, PNorm = 61.7915, GNorm = 2.0065, lr_0 = 2.3957e-04
Loss = 3.4405e-01, PNorm = 61.7946, GNorm = 1.6984, lr_0 = 2.3941e-04
Loss = 3.3101e-01, PNorm = 61.7953, GNorm = 1.1467, lr_0 = 2.3924e-04
Loss = 3.6151e-01, PNorm = 61.7983, GNorm = 1.1450, lr_0 = 2.3908e-04
Loss = 3.7481e-01, PNorm = 61.8020, GNorm = 1.4590, lr_0 = 2.3892e-04
Loss = 4.1136e-01, PNorm = 61.8037, GNorm = 1.3523, lr_0 = 2.3875e-04
Loss = 3.9709e-01, PNorm = 61.8058, GNorm = 1.4253, lr_0 = 2.3859e-04
Loss = 3.6290e-01, PNorm = 61.8083, GNorm = 1.3864, lr_0 = 2.3842e-04
Loss = 3.6243e-01, PNorm = 61.8110, GNorm = 1.6670, lr_0 = 2.3826e-04
Loss = 3.4666e-01, PNorm = 61.8135, GNorm = 1.3238, lr_0 = 2.3810e-04
Loss = 3.6731e-01, PNorm = 61.8161, GNorm = 1.2231, lr_0 = 2.3794e-04
Loss = 3.9288e-01, PNorm = 61.8181, GNorm = 1.2195, lr_0 = 2.3777e-04
Loss = 3.4932e-01, PNorm = 61.8193, GNorm = 1.1409, lr_0 = 2.3761e-04
Loss = 3.5560e-01, PNorm = 61.8202, GNorm = 1.3285, lr_0 = 2.3745e-04
Loss = 3.3970e-01, PNorm = 61.8227, GNorm = 1.2779, lr_0 = 2.3728e-04
Loss = 3.6456e-01, PNorm = 61.8262, GNorm = 1.2471, lr_0 = 2.3712e-04
Loss = 3.6347e-01, PNorm = 61.8281, GNorm = 1.1894, lr_0 = 2.3696e-04
Loss = 3.8969e-01, PNorm = 61.8320, GNorm = 1.6179, lr_0 = 2.3680e-04
Loss = 3.6263e-01, PNorm = 61.8329, GNorm = 1.4907, lr_0 = 2.3663e-04
Loss = 3.9659e-01, PNorm = 61.8357, GNorm = 1.2509, lr_0 = 2.3647e-04
Loss = 4.1115e-01, PNorm = 61.8396, GNorm = 1.4926, lr_0 = 2.3631e-04
Loss = 3.5634e-01, PNorm = 61.8418, GNorm = 1.2527, lr_0 = 2.3615e-04
Loss = 3.5629e-01, PNorm = 61.8433, GNorm = 1.1718, lr_0 = 2.3599e-04
Loss = 3.6264e-01, PNorm = 61.8441, GNorm = 1.6071, lr_0 = 2.3582e-04
Loss = 3.4158e-01, PNorm = 61.8492, GNorm = 1.1017, lr_0 = 2.3566e-04
Loss = 3.7182e-01, PNorm = 61.8483, GNorm = 1.5015, lr_0 = 2.3550e-04
Loss = 4.1182e-01, PNorm = 61.8480, GNorm = 1.1694, lr_0 = 2.3534e-04
Loss = 3.8153e-01, PNorm = 61.8504, GNorm = 1.5309, lr_0 = 2.3518e-04
Loss = 3.8902e-01, PNorm = 61.8532, GNorm = 0.9646, lr_0 = 2.3502e-04
Loss = 3.8163e-01, PNorm = 61.8586, GNorm = 1.2346, lr_0 = 2.3486e-04
Loss = 3.5516e-01, PNorm = 61.8630, GNorm = 1.4894, lr_0 = 2.3470e-04
Loss = 3.8419e-01, PNorm = 61.8652, GNorm = 1.1724, lr_0 = 2.3454e-04
Loss = 4.0327e-01, PNorm = 61.8687, GNorm = 1.9835, lr_0 = 2.3437e-04
Loss = 3.2063e-01, PNorm = 61.8699, GNorm = 1.1176, lr_0 = 2.3421e-04
Loss = 3.8844e-01, PNorm = 61.8720, GNorm = 1.1364, lr_0 = 2.3405e-04
Loss = 3.5341e-01, PNorm = 61.8712, GNorm = 1.4994, lr_0 = 2.3389e-04
Loss = 3.7560e-01, PNorm = 61.8736, GNorm = 1.0300, lr_0 = 2.3373e-04
Loss = 3.1920e-01, PNorm = 61.8750, GNorm = 0.9813, lr_0 = 2.3357e-04
Loss = 4.0838e-01, PNorm = 61.8762, GNorm = 2.8594, lr_0 = 2.3341e-04
Loss = 3.4918e-01, PNorm = 61.8799, GNorm = 1.8284, lr_0 = 2.3325e-04
Loss = 3.5062e-01, PNorm = 61.8831, GNorm = 1.1337, lr_0 = 2.3309e-04
Loss = 3.9418e-01, PNorm = 61.8847, GNorm = 1.8001, lr_0 = 2.3293e-04
Loss = 3.7737e-01, PNorm = 61.8851, GNorm = 1.7101, lr_0 = 2.3277e-04
Loss = 3.2729e-01, PNorm = 61.8869, GNorm = 1.3991, lr_0 = 2.3261e-04
Loss = 3.4187e-01, PNorm = 61.8881, GNorm = 1.1049, lr_0 = 2.3246e-04
Loss = 4.3537e-01, PNorm = 61.8907, GNorm = 1.1686, lr_0 = 2.3230e-04
Loss = 3.7368e-01, PNorm = 61.8953, GNorm = 1.0402, lr_0 = 2.3214e-04
Loss = 3.6046e-01, PNorm = 61.8986, GNorm = 0.9823, lr_0 = 2.3198e-04
Loss = 3.4489e-01, PNorm = 61.9040, GNorm = 1.1849, lr_0 = 2.3182e-04
Loss = 3.8177e-01, PNorm = 61.9034, GNorm = 1.6900, lr_0 = 2.3166e-04
Loss = 3.7911e-01, PNorm = 61.9064, GNorm = 1.3875, lr_0 = 2.3150e-04
Loss = 3.5647e-01, PNorm = 61.9115, GNorm = 1.2741, lr_0 = 2.3134e-04
Loss = 3.6464e-01, PNorm = 61.9131, GNorm = 1.5690, lr_0 = 2.3118e-04
Loss = 3.3625e-01, PNorm = 61.9204, GNorm = 0.8419, lr_0 = 2.3103e-04
Loss = 3.6219e-01, PNorm = 61.9218, GNorm = 2.1072, lr_0 = 2.3087e-04
Loss = 3.4932e-01, PNorm = 61.9234, GNorm = 1.0830, lr_0 = 2.3071e-04
Loss = 3.7176e-01, PNorm = 61.9262, GNorm = 1.5765, lr_0 = 2.3055e-04
Loss = 3.4504e-01, PNorm = 61.9278, GNorm = 1.2441, lr_0 = 2.3039e-04
Loss = 3.7795e-01, PNorm = 61.9283, GNorm = 1.5054, lr_0 = 2.3024e-04
Loss = 3.4346e-01, PNorm = 61.9326, GNorm = 1.1333, lr_0 = 2.3008e-04
Loss = 3.7052e-01, PNorm = 61.9334, GNorm = 1.6624, lr_0 = 2.2992e-04
Loss = 4.1654e-01, PNorm = 61.9341, GNorm = 1.5740, lr_0 = 2.2976e-04
Loss = 3.8062e-01, PNorm = 61.9351, GNorm = 1.7543, lr_0 = 2.2961e-04
Loss = 4.0971e-01, PNorm = 61.9352, GNorm = 1.4116, lr_0 = 2.2945e-04
Loss = 3.7347e-01, PNorm = 61.9392, GNorm = 1.5276, lr_0 = 2.2929e-04
Loss = 3.6892e-01, PNorm = 61.9400, GNorm = 0.9504, lr_0 = 2.2913e-04
Loss = 3.7716e-01, PNorm = 61.9426, GNorm = 1.1089, lr_0 = 2.2898e-04
Loss = 3.6329e-01, PNorm = 61.9413, GNorm = 1.4954, lr_0 = 2.2882e-04
Loss = 3.7964e-01, PNorm = 61.9453, GNorm = 1.3451, lr_0 = 2.2866e-04
Loss = 3.6876e-01, PNorm = 61.9489, GNorm = 1.5763, lr_0 = 2.2851e-04
Loss = 3.4821e-01, PNorm = 61.9522, GNorm = 1.7663, lr_0 = 2.2835e-04
Loss = 3.9822e-01, PNorm = 61.9568, GNorm = 1.0280, lr_0 = 2.2819e-04
Loss = 3.1360e-01, PNorm = 61.9591, GNorm = 1.4789, lr_0 = 2.2804e-04
Loss = 3.7323e-01, PNorm = 61.9624, GNorm = 0.9772, lr_0 = 2.2788e-04
Loss = 3.7715e-01, PNorm = 61.9660, GNorm = 1.6962, lr_0 = 2.2773e-04
Loss = 3.5016e-01, PNorm = 61.9676, GNorm = 1.6357, lr_0 = 2.2757e-04
Validation mae = 0.112129
Epoch 20
Loss = 3.4697e-01, PNorm = 61.9683, GNorm = 1.5861, lr_0 = 2.2741e-04
Loss = 3.4937e-01, PNorm = 61.9728, GNorm = 1.6692, lr_0 = 2.2726e-04
Loss = 3.5442e-01, PNorm = 61.9761, GNorm = 1.3671, lr_0 = 2.2710e-04
Loss = 3.5370e-01, PNorm = 61.9768, GNorm = 0.9030, lr_0 = 2.2695e-04
Loss = 3.5177e-01, PNorm = 61.9793, GNorm = 1.4130, lr_0 = 2.2679e-04
Loss = 3.8026e-01, PNorm = 61.9787, GNorm = 1.9164, lr_0 = 2.2664e-04
Loss = 3.6078e-01, PNorm = 61.9813, GNorm = 1.1505, lr_0 = 2.2648e-04
Loss = 4.3038e-01, PNorm = 61.9818, GNorm = 1.7162, lr_0 = 2.2632e-04
Loss = 3.8920e-01, PNorm = 61.9859, GNorm = 1.3604, lr_0 = 2.2617e-04
Loss = 3.6061e-01, PNorm = 61.9883, GNorm = 1.9774, lr_0 = 2.2601e-04
Loss = 3.2025e-01, PNorm = 61.9878, GNorm = 1.4109, lr_0 = 2.2586e-04
Loss = 3.6678e-01, PNorm = 61.9906, GNorm = 1.3335, lr_0 = 2.2571e-04
Loss = 3.8120e-01, PNorm = 61.9910, GNorm = 1.2426, lr_0 = 2.2555e-04
Loss = 3.2516e-01, PNorm = 61.9947, GNorm = 1.2447, lr_0 = 2.2540e-04
Loss = 3.6828e-01, PNorm = 61.9968, GNorm = 1.5493, lr_0 = 2.2524e-04
Loss = 3.4233e-01, PNorm = 62.0013, GNorm = 1.0170, lr_0 = 2.2509e-04
Loss = 3.4686e-01, PNorm = 62.0036, GNorm = 1.4923, lr_0 = 2.2493e-04
Loss = 3.0347e-01, PNorm = 62.0070, GNorm = 1.7746, lr_0 = 2.2478e-04
Loss = 3.3018e-01, PNorm = 62.0088, GNorm = 1.2543, lr_0 = 2.2463e-04
Loss = 3.2487e-01, PNorm = 62.0112, GNorm = 1.1931, lr_0 = 2.2447e-04
Loss = 3.2970e-01, PNorm = 62.0185, GNorm = 1.5244, lr_0 = 2.2432e-04
Loss = 3.7473e-01, PNorm = 62.0207, GNorm = 1.2666, lr_0 = 2.2416e-04
Loss = 3.5674e-01, PNorm = 62.0256, GNorm = 1.2855, lr_0 = 2.2401e-04
Loss = 2.9340e-01, PNorm = 62.0292, GNorm = 1.1849, lr_0 = 2.2386e-04
Loss = 4.1530e-01, PNorm = 62.0310, GNorm = 1.4402, lr_0 = 2.2370e-04
Loss = 3.5018e-01, PNorm = 62.0347, GNorm = 1.2038, lr_0 = 2.2355e-04
Loss = 3.1676e-01, PNorm = 62.0374, GNorm = 1.4632, lr_0 = 2.2340e-04
Loss = 3.6676e-01, PNorm = 62.0398, GNorm = 1.5001, lr_0 = 2.2324e-04
Loss = 3.5266e-01, PNorm = 62.0438, GNorm = 1.1980, lr_0 = 2.2309e-04
Loss = 4.4676e-01, PNorm = 62.0462, GNorm = 1.7437, lr_0 = 2.2294e-04
Loss = 4.1794e-01, PNorm = 62.0480, GNorm = 1.2151, lr_0 = 2.2279e-04
Loss = 4.2195e-01, PNorm = 62.0515, GNorm = 1.4409, lr_0 = 2.2263e-04
Loss = 3.4560e-01, PNorm = 62.0534, GNorm = 1.5186, lr_0 = 2.2248e-04
Loss = 3.9241e-01, PNorm = 62.0561, GNorm = 1.7451, lr_0 = 2.2233e-04
Loss = 3.4931e-01, PNorm = 62.0617, GNorm = 1.1243, lr_0 = 2.2218e-04
Loss = 3.4643e-01, PNorm = 62.0646, GNorm = 1.5609, lr_0 = 2.2202e-04
Loss = 3.6667e-01, PNorm = 62.0659, GNorm = 0.9837, lr_0 = 2.2187e-04
Loss = 3.6618e-01, PNorm = 62.0691, GNorm = 1.0389, lr_0 = 2.2172e-04
Loss = 3.7237e-01, PNorm = 62.0718, GNorm = 2.0191, lr_0 = 2.2157e-04
Loss = 3.7173e-01, PNorm = 62.0704, GNorm = 1.5918, lr_0 = 2.2142e-04
Loss = 3.9343e-01, PNorm = 62.0746, GNorm = 1.8824, lr_0 = 2.2126e-04
Loss = 3.7567e-01, PNorm = 62.0807, GNorm = 1.8805, lr_0 = 2.2111e-04
Loss = 3.6200e-01, PNorm = 62.0855, GNorm = 2.0079, lr_0 = 2.2096e-04
Loss = 3.1883e-01, PNorm = 62.0889, GNorm = 1.2780, lr_0 = 2.2081e-04
Loss = 3.4301e-01, PNorm = 62.0932, GNorm = 1.3422, lr_0 = 2.2066e-04
Loss = 3.2580e-01, PNorm = 62.0957, GNorm = 1.0782, lr_0 = 2.2051e-04
Loss = 3.3199e-01, PNorm = 62.0982, GNorm = 1.9863, lr_0 = 2.2036e-04
Loss = 3.3924e-01, PNorm = 62.1013, GNorm = 1.4846, lr_0 = 2.2021e-04
Loss = 3.6066e-01, PNorm = 62.1022, GNorm = 1.4649, lr_0 = 2.2005e-04
Loss = 3.9714e-01, PNorm = 62.1029, GNorm = 1.5919, lr_0 = 2.1990e-04
Loss = 4.0194e-01, PNorm = 62.1089, GNorm = 1.6819, lr_0 = 2.1975e-04
Loss = 3.6424e-01, PNorm = 62.1094, GNorm = 1.3116, lr_0 = 2.1960e-04
Loss = 3.6328e-01, PNorm = 62.1129, GNorm = 2.0854, lr_0 = 2.1945e-04
Loss = 3.7310e-01, PNorm = 62.1182, GNorm = 1.0431, lr_0 = 2.1930e-04
Loss = 4.0764e-01, PNorm = 62.1189, GNorm = 1.1892, lr_0 = 2.1915e-04
Loss = 3.4693e-01, PNorm = 62.1240, GNorm = 1.6955, lr_0 = 2.1900e-04
Loss = 4.1380e-01, PNorm = 62.1279, GNorm = 1.3854, lr_0 = 2.1885e-04
Loss = 3.3161e-01, PNorm = 62.1334, GNorm = 1.3226, lr_0 = 2.1870e-04
Loss = 3.9010e-01, PNorm = 62.1352, GNorm = 1.5292, lr_0 = 2.1855e-04
Loss = 3.9916e-01, PNorm = 62.1364, GNorm = 1.8411, lr_0 = 2.1840e-04
Loss = 3.4626e-01, PNorm = 62.1410, GNorm = 1.1693, lr_0 = 2.1825e-04
Loss = 4.0443e-01, PNorm = 62.1449, GNorm = 2.1714, lr_0 = 2.1810e-04
Loss = 3.6945e-01, PNorm = 62.1455, GNorm = 1.3228, lr_0 = 2.1795e-04
Loss = 3.9539e-01, PNorm = 62.1473, GNorm = 1.3851, lr_0 = 2.1780e-04
Loss = 3.5945e-01, PNorm = 62.1510, GNorm = 1.3195, lr_0 = 2.1765e-04
Loss = 3.6045e-01, PNorm = 62.1548, GNorm = 1.6644, lr_0 = 2.1751e-04
Loss = 3.6974e-01, PNorm = 62.1570, GNorm = 1.5702, lr_0 = 2.1736e-04
Loss = 3.7097e-01, PNorm = 62.1568, GNorm = 1.1111, lr_0 = 2.1721e-04
Loss = 3.5336e-01, PNorm = 62.1575, GNorm = 1.3365, lr_0 = 2.1706e-04
Loss = 3.4227e-01, PNorm = 62.1611, GNorm = 1.2759, lr_0 = 2.1691e-04
Loss = 3.3978e-01, PNorm = 62.1634, GNorm = 1.4577, lr_0 = 2.1676e-04
Loss = 3.7738e-01, PNorm = 62.1648, GNorm = 1.1369, lr_0 = 2.1661e-04
Loss = 3.2966e-01, PNorm = 62.1689, GNorm = 1.5673, lr_0 = 2.1646e-04
Loss = 3.6694e-01, PNorm = 62.1717, GNorm = 1.8761, lr_0 = 2.1632e-04
Loss = 3.5567e-01, PNorm = 62.1740, GNorm = 1.0261, lr_0 = 2.1617e-04
Loss = 3.8225e-01, PNorm = 62.1756, GNorm = 1.7632, lr_0 = 2.1602e-04
Loss = 3.4197e-01, PNorm = 62.1780, GNorm = 1.6019, lr_0 = 2.1587e-04
Loss = 3.7146e-01, PNorm = 62.1792, GNorm = 2.0533, lr_0 = 2.1572e-04
Loss = 3.5316e-01, PNorm = 62.1807, GNorm = 1.8709, lr_0 = 2.1558e-04
Loss = 4.1090e-01, PNorm = 62.1816, GNorm = 2.7866, lr_0 = 2.1543e-04
Loss = 3.1501e-01, PNorm = 62.1820, GNorm = 1.1514, lr_0 = 2.1528e-04
Loss = 4.3608e-01, PNorm = 62.1808, GNorm = 1.5788, lr_0 = 2.1513e-04
Loss = 3.7773e-01, PNorm = 62.1844, GNorm = 1.2365, lr_0 = 2.1499e-04
Loss = 3.7543e-01, PNorm = 62.1891, GNorm = 1.2139, lr_0 = 2.1484e-04
Loss = 3.5644e-01, PNorm = 62.1882, GNorm = 2.0323, lr_0 = 2.1469e-04
Loss = 3.8264e-01, PNorm = 62.1871, GNorm = 1.9751, lr_0 = 2.1454e-04
Loss = 3.8872e-01, PNorm = 62.1939, GNorm = 1.7595, lr_0 = 2.1440e-04
Loss = 3.3235e-01, PNorm = 62.1997, GNorm = 1.5541, lr_0 = 2.1425e-04
Loss = 3.2135e-01, PNorm = 62.1997, GNorm = 0.8374, lr_0 = 2.1410e-04
Loss = 3.7513e-01, PNorm = 62.2025, GNorm = 1.1353, lr_0 = 2.1396e-04
Loss = 3.3718e-01, PNorm = 62.2053, GNorm = 1.5025, lr_0 = 2.1381e-04
Loss = 4.0810e-01, PNorm = 62.2077, GNorm = 1.0634, lr_0 = 2.1366e-04
Loss = 3.7732e-01, PNorm = 62.2114, GNorm = 1.1308, lr_0 = 2.1352e-04
Loss = 3.9729e-01, PNorm = 62.2145, GNorm = 1.2758, lr_0 = 2.1337e-04
Loss = 3.6475e-01, PNorm = 62.2162, GNorm = 1.8200, lr_0 = 2.1323e-04
Loss = 3.4469e-01, PNorm = 62.2188, GNorm = 1.2497, lr_0 = 2.1308e-04
Loss = 4.5173e-01, PNorm = 62.2176, GNorm = 1.5908, lr_0 = 2.1293e-04
Loss = 4.4176e-01, PNorm = 62.2196, GNorm = 1.5615, lr_0 = 2.1279e-04
Loss = 3.2920e-01, PNorm = 62.2239, GNorm = 1.3402, lr_0 = 2.1264e-04
Loss = 4.0359e-01, PNorm = 62.2254, GNorm = 0.9120, lr_0 = 2.1250e-04
Loss = 3.4953e-01, PNorm = 62.2255, GNorm = 1.7238, lr_0 = 2.1235e-04
Loss = 3.6168e-01, PNorm = 62.2287, GNorm = 1.9502, lr_0 = 2.1221e-04
Loss = 3.2385e-01, PNorm = 62.2298, GNorm = 1.0445, lr_0 = 2.1206e-04
Loss = 3.6078e-01, PNorm = 62.2309, GNorm = 1.2089, lr_0 = 2.1191e-04
Loss = 4.1543e-01, PNorm = 62.2357, GNorm = 1.4578, lr_0 = 2.1177e-04
Loss = 4.2423e-01, PNorm = 62.2381, GNorm = 2.4528, lr_0 = 2.1162e-04
Loss = 3.1445e-01, PNorm = 62.2405, GNorm = 1.0018, lr_0 = 2.1148e-04
Loss = 3.2259e-01, PNorm = 62.2426, GNorm = 1.5865, lr_0 = 2.1133e-04
Loss = 3.6551e-01, PNorm = 62.2445, GNorm = 1.3314, lr_0 = 2.1119e-04
Loss = 3.1777e-01, PNorm = 62.2443, GNorm = 1.4282, lr_0 = 2.1104e-04
Loss = 3.6953e-01, PNorm = 62.2466, GNorm = 1.5532, lr_0 = 2.1090e-04
Loss = 3.7920e-01, PNorm = 62.2501, GNorm = 1.7061, lr_0 = 2.1076e-04
Loss = 3.2509e-01, PNorm = 62.2522, GNorm = 1.6671, lr_0 = 2.1061e-04
Loss = 3.6388e-01, PNorm = 62.2545, GNorm = 1.4561, lr_0 = 2.1047e-04
Loss = 3.4627e-01, PNorm = 62.2549, GNorm = 1.4298, lr_0 = 2.1032e-04
Loss = 3.8196e-01, PNorm = 62.2554, GNorm = 1.8880, lr_0 = 2.1018e-04
Loss = 3.7896e-01, PNorm = 62.2549, GNorm = 1.2002, lr_0 = 2.1003e-04
Loss = 3.5622e-01, PNorm = 62.2568, GNorm = 1.6590, lr_0 = 2.0989e-04
Loss = 3.7431e-01, PNorm = 62.2579, GNorm = 1.5244, lr_0 = 2.0975e-04
Loss = 3.3330e-01, PNorm = 62.2592, GNorm = 1.1093, lr_0 = 2.0960e-04
Validation mae = 0.111872
Epoch 21
Loss = 3.9788e-01, PNorm = 62.2621, GNorm = 2.1140, lr_0 = 2.0946e-04
Loss = 3.5704e-01, PNorm = 62.2687, GNorm = 1.8686, lr_0 = 2.0932e-04
Loss = 3.5695e-01, PNorm = 62.2710, GNorm = 1.4747, lr_0 = 2.0917e-04
Loss = 3.9022e-01, PNorm = 62.2738, GNorm = 1.7808, lr_0 = 2.0903e-04
Loss = 3.4249e-01, PNorm = 62.2786, GNorm = 1.2986, lr_0 = 2.0889e-04
Loss = 3.5599e-01, PNorm = 62.2806, GNorm = 1.4732, lr_0 = 2.0874e-04
Loss = 3.4599e-01, PNorm = 62.2841, GNorm = 1.3350, lr_0 = 2.0860e-04
Loss = 3.3265e-01, PNorm = 62.2860, GNorm = 1.9823, lr_0 = 2.0846e-04
Loss = 3.7577e-01, PNorm = 62.2892, GNorm = 1.5248, lr_0 = 2.0831e-04
Loss = 3.3829e-01, PNorm = 62.2898, GNorm = 1.3275, lr_0 = 2.0817e-04
Loss = 3.3509e-01, PNorm = 62.2916, GNorm = 1.3406, lr_0 = 2.0803e-04
Loss = 3.7547e-01, PNorm = 62.2946, GNorm = 1.4965, lr_0 = 2.0789e-04
Loss = 3.5403e-01, PNorm = 62.2971, GNorm = 0.9320, lr_0 = 2.0774e-04
Loss = 3.9323e-01, PNorm = 62.2970, GNorm = 1.7351, lr_0 = 2.0760e-04
Loss = 3.3368e-01, PNorm = 62.2979, GNorm = 1.5721, lr_0 = 2.0746e-04
Loss = 3.9819e-01, PNorm = 62.2969, GNorm = 1.3967, lr_0 = 2.0732e-04
Loss = 3.6700e-01, PNorm = 62.3012, GNorm = 1.4160, lr_0 = 2.0718e-04
Loss = 4.1535e-01, PNorm = 62.3027, GNorm = 1.8638, lr_0 = 2.0703e-04
Loss = 3.2875e-01, PNorm = 62.3050, GNorm = 1.3719, lr_0 = 2.0689e-04
Loss = 3.2987e-01, PNorm = 62.3073, GNorm = 1.6463, lr_0 = 2.0675e-04
Loss = 3.9656e-01, PNorm = 62.3092, GNorm = 1.5242, lr_0 = 2.0661e-04
Loss = 3.2905e-01, PNorm = 62.3103, GNorm = 1.4048, lr_0 = 2.0647e-04
Loss = 3.4045e-01, PNorm = 62.3116, GNorm = 1.3787, lr_0 = 2.0633e-04
Loss = 3.1424e-01, PNorm = 62.3151, GNorm = 1.2594, lr_0 = 2.0618e-04
Loss = 3.7457e-01, PNorm = 62.3181, GNorm = 1.1766, lr_0 = 2.0604e-04
Loss = 3.5302e-01, PNorm = 62.3187, GNorm = 1.0530, lr_0 = 2.0590e-04
Loss = 3.7920e-01, PNorm = 62.3218, GNorm = 1.3131, lr_0 = 2.0576e-04
Loss = 3.8754e-01, PNorm = 62.3228, GNorm = 1.8581, lr_0 = 2.0562e-04
Loss = 3.6546e-01, PNorm = 62.3255, GNorm = 1.2448, lr_0 = 2.0548e-04
Loss = 3.2576e-01, PNorm = 62.3276, GNorm = 1.2204, lr_0 = 2.0534e-04
Loss = 4.2412e-01, PNorm = 62.3285, GNorm = 1.5165, lr_0 = 2.0520e-04
Loss = 3.3221e-01, PNorm = 62.3314, GNorm = 1.2094, lr_0 = 2.0506e-04
Loss = 3.7602e-01, PNorm = 62.3342, GNorm = 1.6579, lr_0 = 2.0492e-04
Loss = 3.7356e-01, PNorm = 62.3404, GNorm = 1.6488, lr_0 = 2.0478e-04
Loss = 3.6936e-01, PNorm = 62.3438, GNorm = 1.7312, lr_0 = 2.0464e-04
Loss = 3.3461e-01, PNorm = 62.3476, GNorm = 1.3823, lr_0 = 2.0450e-04
Loss = 2.9832e-01, PNorm = 62.3469, GNorm = 1.2156, lr_0 = 2.0436e-04
Loss = 3.7803e-01, PNorm = 62.3460, GNorm = 1.5733, lr_0 = 2.0422e-04
Loss = 3.6210e-01, PNorm = 62.3482, GNorm = 1.6255, lr_0 = 2.0408e-04
Loss = 3.6619e-01, PNorm = 62.3514, GNorm = 1.1912, lr_0 = 2.0394e-04
Loss = 3.4485e-01, PNorm = 62.3522, GNorm = 1.3523, lr_0 = 2.0380e-04
Loss = 3.4741e-01, PNorm = 62.3523, GNorm = 1.2804, lr_0 = 2.0366e-04
Loss = 3.2513e-01, PNorm = 62.3553, GNorm = 1.4756, lr_0 = 2.0352e-04
Loss = 3.9085e-01, PNorm = 62.3562, GNorm = 1.3260, lr_0 = 2.0338e-04
Loss = 3.9641e-01, PNorm = 62.3589, GNorm = 1.6011, lr_0 = 2.0324e-04
Loss = 3.1796e-01, PNorm = 62.3627, GNorm = 1.1067, lr_0 = 2.0310e-04
Loss = 3.5083e-01, PNorm = 62.3647, GNorm = 1.4720, lr_0 = 2.0296e-04
Loss = 3.6144e-01, PNorm = 62.3642, GNorm = 1.2610, lr_0 = 2.0282e-04
Loss = 3.4683e-01, PNorm = 62.3666, GNorm = 2.8967, lr_0 = 2.0268e-04
Loss = 3.2100e-01, PNorm = 62.3712, GNorm = 1.4241, lr_0 = 2.0254e-04
Loss = 3.2423e-01, PNorm = 62.3742, GNorm = 1.4050, lr_0 = 2.0240e-04
Loss = 3.6554e-01, PNorm = 62.3751, GNorm = 1.4029, lr_0 = 2.0227e-04
Loss = 3.4506e-01, PNorm = 62.3774, GNorm = 1.2325, lr_0 = 2.0213e-04
Loss = 3.5975e-01, PNorm = 62.3791, GNorm = 1.8943, lr_0 = 2.0199e-04
Loss = 4.0479e-01, PNorm = 62.3799, GNorm = 1.8557, lr_0 = 2.0185e-04
Loss = 3.6834e-01, PNorm = 62.3824, GNorm = 1.2565, lr_0 = 2.0171e-04
Loss = 3.9387e-01, PNorm = 62.3865, GNorm = 1.3217, lr_0 = 2.0157e-04
Loss = 3.8249e-01, PNorm = 62.3877, GNorm = 1.3769, lr_0 = 2.0144e-04
Loss = 3.9978e-01, PNorm = 62.3931, GNorm = 1.4312, lr_0 = 2.0130e-04
Loss = 3.6415e-01, PNorm = 62.3966, GNorm = 1.1855, lr_0 = 2.0116e-04
Loss = 4.1085e-01, PNorm = 62.4012, GNorm = 1.5088, lr_0 = 2.0102e-04
Loss = 3.0610e-01, PNorm = 62.4028, GNorm = 1.0090, lr_0 = 2.0088e-04
Loss = 3.7312e-01, PNorm = 62.4014, GNorm = 1.6342, lr_0 = 2.0075e-04
Loss = 3.8283e-01, PNorm = 62.3997, GNorm = 1.7441, lr_0 = 2.0061e-04
Loss = 3.1378e-01, PNorm = 62.4024, GNorm = 1.0497, lr_0 = 2.0047e-04
Loss = 3.9318e-01, PNorm = 62.4041, GNorm = 2.4735, lr_0 = 2.0033e-04
Loss = 3.7051e-01, PNorm = 62.4073, GNorm = 0.9929, lr_0 = 2.0020e-04
Loss = 3.3581e-01, PNorm = 62.4106, GNorm = 1.3163, lr_0 = 2.0006e-04
Loss = 3.0081e-01, PNorm = 62.4121, GNorm = 1.2479, lr_0 = 1.9992e-04
Loss = 3.1662e-01, PNorm = 62.4122, GNorm = 1.4801, lr_0 = 1.9979e-04
Loss = 3.8785e-01, PNorm = 62.4157, GNorm = 1.0301, lr_0 = 1.9965e-04
Loss = 3.5376e-01, PNorm = 62.4188, GNorm = 1.8372, lr_0 = 1.9951e-04
Loss = 3.1439e-01, PNorm = 62.4221, GNorm = 1.3115, lr_0 = 1.9938e-04
Loss = 3.4815e-01, PNorm = 62.4234, GNorm = 2.0239, lr_0 = 1.9924e-04
Loss = 3.6004e-01, PNorm = 62.4249, GNorm = 1.5071, lr_0 = 1.9910e-04
Loss = 3.1709e-01, PNorm = 62.4267, GNorm = 1.1806, lr_0 = 1.9897e-04
Loss = 3.7737e-01, PNorm = 62.4286, GNorm = 1.2260, lr_0 = 1.9883e-04
Loss = 3.9633e-01, PNorm = 62.4302, GNorm = 1.3963, lr_0 = 1.9869e-04
Loss = 3.5197e-01, PNorm = 62.4339, GNorm = 1.4231, lr_0 = 1.9856e-04
Loss = 3.7270e-01, PNorm = 62.4361, GNorm = 1.0142, lr_0 = 1.9842e-04
Loss = 3.4051e-01, PNorm = 62.4355, GNorm = 1.4334, lr_0 = 1.9829e-04
Loss = 3.4103e-01, PNorm = 62.4356, GNorm = 1.3777, lr_0 = 1.9815e-04
Loss = 4.2523e-01, PNorm = 62.4405, GNorm = 1.0215, lr_0 = 1.9801e-04
Loss = 3.4456e-01, PNorm = 62.4435, GNorm = 1.6416, lr_0 = 1.9788e-04
Loss = 4.0299e-01, PNorm = 62.4456, GNorm = 1.9509, lr_0 = 1.9774e-04
Loss = 3.6327e-01, PNorm = 62.4516, GNorm = 1.5488, lr_0 = 1.9761e-04
Loss = 3.5635e-01, PNorm = 62.4539, GNorm = 1.7336, lr_0 = 1.9747e-04
Loss = 3.4685e-01, PNorm = 62.4569, GNorm = 1.4727, lr_0 = 1.9734e-04
Loss = 3.3346e-01, PNorm = 62.4576, GNorm = 1.0709, lr_0 = 1.9720e-04
Loss = 3.0973e-01, PNorm = 62.4600, GNorm = 1.3098, lr_0 = 1.9707e-04
Loss = 3.6519e-01, PNorm = 62.4632, GNorm = 1.3931, lr_0 = 1.9693e-04
Loss = 3.8168e-01, PNorm = 62.4635, GNorm = 2.0298, lr_0 = 1.9680e-04
Loss = 3.8127e-01, PNorm = 62.4678, GNorm = 1.2282, lr_0 = 1.9666e-04
Loss = 4.1468e-01, PNorm = 62.4699, GNorm = 1.2664, lr_0 = 1.9653e-04
Loss = 3.6475e-01, PNorm = 62.4706, GNorm = 2.0871, lr_0 = 1.9639e-04
Loss = 3.7955e-01, PNorm = 62.4718, GNorm = 1.8475, lr_0 = 1.9626e-04
Loss = 3.7671e-01, PNorm = 62.4749, GNorm = 1.5793, lr_0 = 1.9612e-04
Loss = 3.6802e-01, PNorm = 62.4735, GNorm = 1.6633, lr_0 = 1.9599e-04
Loss = 3.9924e-01, PNorm = 62.4751, GNorm = 1.2680, lr_0 = 1.9585e-04
Loss = 3.4440e-01, PNorm = 62.4767, GNorm = 1.4595, lr_0 = 1.9572e-04
Loss = 4.0255e-01, PNorm = 62.4800, GNorm = 1.1590, lr_0 = 1.9559e-04
Loss = 3.7329e-01, PNorm = 62.4808, GNorm = 1.2872, lr_0 = 1.9545e-04
Loss = 4.2316e-01, PNorm = 62.4828, GNorm = 1.8890, lr_0 = 1.9532e-04
Loss = 3.7556e-01, PNorm = 62.4871, GNorm = 0.9858, lr_0 = 1.9518e-04
Loss = 3.7137e-01, PNorm = 62.4921, GNorm = 1.4119, lr_0 = 1.9505e-04
Loss = 4.4935e-01, PNorm = 62.4933, GNorm = 1.8957, lr_0 = 1.9492e-04
Loss = 4.1475e-01, PNorm = 62.4935, GNorm = 1.2560, lr_0 = 1.9478e-04
Loss = 4.3804e-01, PNorm = 62.4970, GNorm = 1.1941, lr_0 = 1.9465e-04
Loss = 3.6806e-01, PNorm = 62.4970, GNorm = 1.2875, lr_0 = 1.9452e-04
Loss = 3.6675e-01, PNorm = 62.4989, GNorm = 1.2700, lr_0 = 1.9438e-04
Loss = 3.9774e-01, PNorm = 62.5011, GNorm = 2.1629, lr_0 = 1.9425e-04
Loss = 3.5671e-01, PNorm = 62.5044, GNorm = 2.0710, lr_0 = 1.9412e-04
Loss = 3.6482e-01, PNorm = 62.5055, GNorm = 1.7039, lr_0 = 1.9398e-04
Loss = 3.2031e-01, PNorm = 62.5053, GNorm = 0.9607, lr_0 = 1.9385e-04
Loss = 3.6337e-01, PNorm = 62.5064, GNorm = 1.3965, lr_0 = 1.9372e-04
Loss = 4.5047e-01, PNorm = 62.5073, GNorm = 1.7484, lr_0 = 1.9359e-04
Loss = 3.5258e-01, PNorm = 62.5121, GNorm = 1.3570, lr_0 = 1.9345e-04
Loss = 3.8169e-01, PNorm = 62.5141, GNorm = 1.2826, lr_0 = 1.9332e-04
Loss = 3.5951e-01, PNorm = 62.5157, GNorm = 1.7575, lr_0 = 1.9319e-04
Loss = 3.4754e-01, PNorm = 62.5185, GNorm = 1.1254, lr_0 = 1.9306e-04
Validation mae = 0.111262
Epoch 22
Loss = 3.1507e-01, PNorm = 62.5187, GNorm = 0.9789, lr_0 = 1.9292e-04
Loss = 3.5235e-01, PNorm = 62.5183, GNorm = 1.4228, lr_0 = 1.9279e-04
Loss = 4.2104e-01, PNorm = 62.5216, GNorm = 1.4696, lr_0 = 1.9266e-04
Loss = 3.5350e-01, PNorm = 62.5250, GNorm = 1.1908, lr_0 = 1.9253e-04
Loss = 3.7922e-01, PNorm = 62.5278, GNorm = 1.1606, lr_0 = 1.9240e-04
Loss = 3.9470e-01, PNorm = 62.5291, GNorm = 1.5550, lr_0 = 1.9226e-04
Loss = 3.9495e-01, PNorm = 62.5331, GNorm = 1.3673, lr_0 = 1.9213e-04
Loss = 3.4971e-01, PNorm = 62.5359, GNorm = 1.4865, lr_0 = 1.9200e-04
Loss = 3.2605e-01, PNorm = 62.5378, GNorm = 1.7556, lr_0 = 1.9187e-04
Loss = 4.0193e-01, PNorm = 62.5385, GNorm = 1.5455, lr_0 = 1.9174e-04
Loss = 3.6942e-01, PNorm = 62.5378, GNorm = 1.6293, lr_0 = 1.9161e-04
Loss = 3.4275e-01, PNorm = 62.5394, GNorm = 1.4340, lr_0 = 1.9148e-04
Loss = 3.8930e-01, PNorm = 62.5432, GNorm = 1.0154, lr_0 = 1.9134e-04
Loss = 3.4865e-01, PNorm = 62.5446, GNorm = 1.3641, lr_0 = 1.9121e-04
Loss = 3.4404e-01, PNorm = 62.5459, GNorm = 2.0559, lr_0 = 1.9108e-04
Loss = 3.3677e-01, PNorm = 62.5495, GNorm = 2.0474, lr_0 = 1.9095e-04
Loss = 3.4957e-01, PNorm = 62.5502, GNorm = 1.4275, lr_0 = 1.9082e-04
Loss = 3.2767e-01, PNorm = 62.5531, GNorm = 1.2953, lr_0 = 1.9069e-04
Loss = 3.4088e-01, PNorm = 62.5564, GNorm = 2.0413, lr_0 = 1.9056e-04
Loss = 3.4595e-01, PNorm = 62.5568, GNorm = 1.5169, lr_0 = 1.9043e-04
Loss = 3.5424e-01, PNorm = 62.5604, GNorm = 1.2401, lr_0 = 1.9030e-04
Loss = 3.4009e-01, PNorm = 62.5635, GNorm = 1.8033, lr_0 = 1.9017e-04
Loss = 3.2513e-01, PNorm = 62.5668, GNorm = 1.2084, lr_0 = 1.9004e-04
Loss = 3.8115e-01, PNorm = 62.5697, GNorm = 1.9392, lr_0 = 1.8991e-04
Loss = 3.2362e-01, PNorm = 62.5709, GNorm = 1.1364, lr_0 = 1.8978e-04
Loss = 3.6215e-01, PNorm = 62.5731, GNorm = 1.1560, lr_0 = 1.8965e-04
Loss = 3.0713e-01, PNorm = 62.5755, GNorm = 1.2996, lr_0 = 1.8952e-04
Loss = 3.4805e-01, PNorm = 62.5767, GNorm = 1.3546, lr_0 = 1.8939e-04
Loss = 3.6221e-01, PNorm = 62.5790, GNorm = 1.0664, lr_0 = 1.8926e-04
Loss = 3.7215e-01, PNorm = 62.5790, GNorm = 1.2576, lr_0 = 1.8913e-04
Loss = 3.9553e-01, PNorm = 62.5775, GNorm = 1.4374, lr_0 = 1.8900e-04
Loss = 4.0594e-01, PNorm = 62.5772, GNorm = 1.5588, lr_0 = 1.8887e-04
Loss = 3.3446e-01, PNorm = 62.5799, GNorm = 1.4901, lr_0 = 1.8874e-04
Loss = 3.3589e-01, PNorm = 62.5788, GNorm = 1.3000, lr_0 = 1.8861e-04
Loss = 3.9161e-01, PNorm = 62.5802, GNorm = 1.8303, lr_0 = 1.8848e-04
Loss = 3.4191e-01, PNorm = 62.5804, GNorm = 1.3554, lr_0 = 1.8835e-04
Loss = 4.0119e-01, PNorm = 62.5787, GNorm = 1.2371, lr_0 = 1.8822e-04
Loss = 3.6814e-01, PNorm = 62.5811, GNorm = 1.4645, lr_0 = 1.8809e-04
Loss = 3.2258e-01, PNorm = 62.5850, GNorm = 2.2671, lr_0 = 1.8797e-04
Loss = 4.9133e-01, PNorm = 62.5847, GNorm = 1.6844, lr_0 = 1.8784e-04
Loss = 3.6305e-01, PNorm = 62.5880, GNorm = 1.1705, lr_0 = 1.8771e-04
Loss = 3.6897e-01, PNorm = 62.5929, GNorm = 1.3894, lr_0 = 1.8758e-04
Loss = 4.0002e-01, PNorm = 62.5947, GNorm = 1.4820, lr_0 = 1.8745e-04
Loss = 3.0731e-01, PNorm = 62.5989, GNorm = 1.7257, lr_0 = 1.8732e-04
Loss = 3.4178e-01, PNorm = 62.6047, GNorm = 0.8536, lr_0 = 1.8719e-04
Loss = 3.6135e-01, PNorm = 62.6069, GNorm = 2.0009, lr_0 = 1.8707e-04
Loss = 3.5605e-01, PNorm = 62.6100, GNorm = 1.5720, lr_0 = 1.8694e-04
Loss = 3.1021e-01, PNorm = 62.6141, GNorm = 1.2186, lr_0 = 1.8681e-04
Loss = 4.2403e-01, PNorm = 62.6143, GNorm = 1.2074, lr_0 = 1.8668e-04
Loss = 3.6518e-01, PNorm = 62.6138, GNorm = 1.6953, lr_0 = 1.8655e-04
Loss = 3.0480e-01, PNorm = 62.6154, GNorm = 1.6245, lr_0 = 1.8643e-04
Loss = 3.2810e-01, PNorm = 62.6169, GNorm = 1.6389, lr_0 = 1.8630e-04
Loss = 3.6255e-01, PNorm = 62.6228, GNorm = 1.7592, lr_0 = 1.8617e-04
Loss = 3.3246e-01, PNorm = 62.6226, GNorm = 1.5582, lr_0 = 1.8604e-04
Loss = 3.3038e-01, PNorm = 62.6257, GNorm = 1.6363, lr_0 = 1.8592e-04
Loss = 2.9758e-01, PNorm = 62.6305, GNorm = 1.0422, lr_0 = 1.8579e-04
Loss = 2.9719e-01, PNorm = 62.6337, GNorm = 1.6138, lr_0 = 1.8566e-04
Loss = 3.6287e-01, PNorm = 62.6355, GNorm = 1.2539, lr_0 = 1.8553e-04
Loss = 3.6631e-01, PNorm = 62.6380, GNorm = 1.1034, lr_0 = 1.8541e-04
Loss = 3.6250e-01, PNorm = 62.6367, GNorm = 1.0281, lr_0 = 1.8528e-04
Loss = 3.8181e-01, PNorm = 62.6371, GNorm = 1.3412, lr_0 = 1.8515e-04
Loss = 3.5155e-01, PNorm = 62.6413, GNorm = 2.2876, lr_0 = 1.8503e-04
Loss = 3.9880e-01, PNorm = 62.6419, GNorm = 1.3627, lr_0 = 1.8490e-04
Loss = 3.6839e-01, PNorm = 62.6442, GNorm = 1.1171, lr_0 = 1.8477e-04
Loss = 3.8144e-01, PNorm = 62.6478, GNorm = 2.0099, lr_0 = 1.8465e-04
Loss = 3.8049e-01, PNorm = 62.6498, GNorm = 1.3355, lr_0 = 1.8452e-04
Loss = 3.3592e-01, PNorm = 62.6525, GNorm = 1.1896, lr_0 = 1.8439e-04
Loss = 3.2297e-01, PNorm = 62.6550, GNorm = 1.8842, lr_0 = 1.8427e-04
Loss = 3.6882e-01, PNorm = 62.6563, GNorm = 1.7581, lr_0 = 1.8414e-04
Loss = 3.1714e-01, PNorm = 62.6564, GNorm = 1.4191, lr_0 = 1.8401e-04
Loss = 3.3026e-01, PNorm = 62.6569, GNorm = 1.7971, lr_0 = 1.8389e-04
Loss = 3.5889e-01, PNorm = 62.6582, GNorm = 1.4266, lr_0 = 1.8376e-04
Loss = 3.5396e-01, PNorm = 62.6614, GNorm = 1.6995, lr_0 = 1.8364e-04
Loss = 3.6669e-01, PNorm = 62.6631, GNorm = 2.0808, lr_0 = 1.8351e-04
Loss = 3.7843e-01, PNorm = 62.6657, GNorm = 1.3785, lr_0 = 1.8338e-04
Loss = 3.3187e-01, PNorm = 62.6657, GNorm = 1.3564, lr_0 = 1.8326e-04
Loss = 3.5043e-01, PNorm = 62.6683, GNorm = 2.8590, lr_0 = 1.8313e-04
Loss = 3.4200e-01, PNorm = 62.6681, GNorm = 1.2771, lr_0 = 1.8301e-04
Loss = 3.6377e-01, PNorm = 62.6678, GNorm = 1.4513, lr_0 = 1.8288e-04
Loss = 3.8286e-01, PNorm = 62.6679, GNorm = 1.3388, lr_0 = 1.8276e-04
Loss = 3.6324e-01, PNorm = 62.6696, GNorm = 1.2121, lr_0 = 1.8263e-04
Loss = 3.6545e-01, PNorm = 62.6718, GNorm = 1.4691, lr_0 = 1.8251e-04
Loss = 3.4999e-01, PNorm = 62.6721, GNorm = 1.5621, lr_0 = 1.8238e-04
Loss = 4.1735e-01, PNorm = 62.6748, GNorm = 1.3724, lr_0 = 1.8226e-04
Loss = 3.6954e-01, PNorm = 62.6776, GNorm = 1.3218, lr_0 = 1.8213e-04
Loss = 3.3398e-01, PNorm = 62.6776, GNorm = 1.2040, lr_0 = 1.8201e-04
Loss = 3.9951e-01, PNorm = 62.6770, GNorm = 1.5753, lr_0 = 1.8188e-04
Loss = 3.7484e-01, PNorm = 62.6794, GNorm = 1.3195, lr_0 = 1.8176e-04
Loss = 3.3283e-01, PNorm = 62.6809, GNorm = 1.0925, lr_0 = 1.8163e-04
Loss = 3.8218e-01, PNorm = 62.6809, GNorm = 1.7958, lr_0 = 1.8151e-04
Loss = 3.6840e-01, PNorm = 62.6849, GNorm = 1.7864, lr_0 = 1.8138e-04
Loss = 3.2223e-01, PNorm = 62.6905, GNorm = 1.2638, lr_0 = 1.8126e-04
Loss = 3.8976e-01, PNorm = 62.6915, GNorm = 1.6339, lr_0 = 1.8114e-04
Loss = 3.1255e-01, PNorm = 62.6946, GNorm = 1.1716, lr_0 = 1.8101e-04
Loss = 3.2941e-01, PNorm = 62.6977, GNorm = 1.4634, lr_0 = 1.8089e-04
Loss = 3.0427e-01, PNorm = 62.6994, GNorm = 1.4521, lr_0 = 1.8076e-04
Loss = 3.3483e-01, PNorm = 62.6998, GNorm = 1.6676, lr_0 = 1.8064e-04
Loss = 3.8555e-01, PNorm = 62.7041, GNorm = 1.4911, lr_0 = 1.8052e-04
Loss = 3.3610e-01, PNorm = 62.7073, GNorm = 1.3187, lr_0 = 1.8039e-04
Loss = 3.4515e-01, PNorm = 62.7107, GNorm = 1.2461, lr_0 = 1.8027e-04
Loss = 3.6787e-01, PNorm = 62.7130, GNorm = 2.0814, lr_0 = 1.8015e-04
Loss = 3.5416e-01, PNorm = 62.7141, GNorm = 1.7700, lr_0 = 1.8002e-04
Loss = 3.5186e-01, PNorm = 62.7136, GNorm = 1.9245, lr_0 = 1.7990e-04
Loss = 3.3615e-01, PNorm = 62.7157, GNorm = 1.9802, lr_0 = 1.7978e-04
Loss = 3.7585e-01, PNorm = 62.7154, GNorm = 2.2557, lr_0 = 1.7965e-04
Loss = 3.8644e-01, PNorm = 62.7189, GNorm = 1.4557, lr_0 = 1.7953e-04
Loss = 3.8182e-01, PNorm = 62.7197, GNorm = 1.2676, lr_0 = 1.7941e-04
Loss = 3.8859e-01, PNorm = 62.7232, GNorm = 1.4545, lr_0 = 1.7928e-04
Loss = 3.8363e-01, PNorm = 62.7247, GNorm = 1.0090, lr_0 = 1.7916e-04
Loss = 3.8533e-01, PNorm = 62.7271, GNorm = 1.4955, lr_0 = 1.7904e-04
Loss = 4.0676e-01, PNorm = 62.7284, GNorm = 1.2900, lr_0 = 1.7892e-04
Loss = 3.5127e-01, PNorm = 62.7308, GNorm = 1.3535, lr_0 = 1.7879e-04
Loss = 3.3082e-01, PNorm = 62.7344, GNorm = 1.9489, lr_0 = 1.7867e-04
Loss = 4.0453e-01, PNorm = 62.7375, GNorm = 1.3747, lr_0 = 1.7855e-04
Loss = 3.0463e-01, PNorm = 62.7414, GNorm = 1.3954, lr_0 = 1.7843e-04
Loss = 3.7250e-01, PNorm = 62.7433, GNorm = 1.2786, lr_0 = 1.7830e-04
Loss = 3.7943e-01, PNorm = 62.7443, GNorm = 1.7414, lr_0 = 1.7818e-04
Loss = 3.3952e-01, PNorm = 62.7435, GNorm = 1.1819, lr_0 = 1.7806e-04
Loss = 3.4082e-01, PNorm = 62.7443, GNorm = 1.3559, lr_0 = 1.7794e-04
Loss = 4.2190e-01, PNorm = 62.7450, GNorm = 1.4492, lr_0 = 1.7782e-04
Validation mae = 0.111805
Epoch 23
Loss = 4.1822e-01, PNorm = 62.7474, GNorm = 1.9036, lr_0 = 1.7769e-04
Loss = 3.8899e-01, PNorm = 62.7503, GNorm = 1.2282, lr_0 = 1.7757e-04
Loss = 3.3307e-01, PNorm = 62.7481, GNorm = 1.6388, lr_0 = 1.7745e-04
Loss = 3.3181e-01, PNorm = 62.7499, GNorm = 1.8219, lr_0 = 1.7733e-04
Loss = 3.4508e-01, PNorm = 62.7528, GNorm = 1.0931, lr_0 = 1.7721e-04
Loss = 3.5334e-01, PNorm = 62.7539, GNorm = 1.5135, lr_0 = 1.7709e-04
Loss = 3.4072e-01, PNorm = 62.7563, GNorm = 1.5257, lr_0 = 1.7696e-04
Loss = 3.4874e-01, PNorm = 62.7572, GNorm = 1.3205, lr_0 = 1.7684e-04
Loss = 3.3846e-01, PNorm = 62.7574, GNorm = 1.1901, lr_0 = 1.7672e-04
Loss = 3.3464e-01, PNorm = 62.7595, GNorm = 1.7424, lr_0 = 1.7660e-04
Loss = 3.6678e-01, PNorm = 62.7616, GNorm = 1.5012, lr_0 = 1.7648e-04
Loss = 3.8906e-01, PNorm = 62.7636, GNorm = 1.2376, lr_0 = 1.7636e-04
Loss = 3.8660e-01, PNorm = 62.7628, GNorm = 1.5065, lr_0 = 1.7624e-04
Loss = 3.4057e-01, PNorm = 62.7631, GNorm = 1.2471, lr_0 = 1.7612e-04
Loss = 3.4745e-01, PNorm = 62.7655, GNorm = 1.4485, lr_0 = 1.7600e-04
Loss = 3.3047e-01, PNorm = 62.7696, GNorm = 1.3184, lr_0 = 1.7588e-04
Loss = 3.6421e-01, PNorm = 62.7733, GNorm = 1.1182, lr_0 = 1.7576e-04
Loss = 3.8517e-01, PNorm = 62.7740, GNorm = 1.6824, lr_0 = 1.7564e-04
Loss = 3.3681e-01, PNorm = 62.7765, GNorm = 1.1141, lr_0 = 1.7552e-04
Loss = 3.5369e-01, PNorm = 62.7803, GNorm = 0.9850, lr_0 = 1.7540e-04
Loss = 4.0834e-01, PNorm = 62.7816, GNorm = 1.2416, lr_0 = 1.7528e-04
Loss = 3.4022e-01, PNorm = 62.7811, GNorm = 2.2138, lr_0 = 1.7516e-04
Loss = 3.8342e-01, PNorm = 62.7854, GNorm = 1.4503, lr_0 = 1.7504e-04
Loss = 3.5148e-01, PNorm = 62.7876, GNorm = 1.6784, lr_0 = 1.7492e-04
Loss = 3.9868e-01, PNorm = 62.7891, GNorm = 1.8693, lr_0 = 1.7480e-04
Loss = 3.1081e-01, PNorm = 62.7923, GNorm = 1.4073, lr_0 = 1.7468e-04
Loss = 3.7655e-01, PNorm = 62.7946, GNorm = 1.8574, lr_0 = 1.7456e-04
Loss = 3.6071e-01, PNorm = 62.7973, GNorm = 1.3370, lr_0 = 1.7444e-04
Loss = 3.7491e-01, PNorm = 62.8012, GNorm = 1.3270, lr_0 = 1.7432e-04
Loss = 3.4717e-01, PNorm = 62.8048, GNorm = 1.4280, lr_0 = 1.7420e-04
Loss = 3.2716e-01, PNorm = 62.8087, GNorm = 1.0698, lr_0 = 1.7408e-04
Loss = 3.3122e-01, PNorm = 62.8125, GNorm = 1.4315, lr_0 = 1.7396e-04
Loss = 3.6588e-01, PNorm = 62.8144, GNorm = 1.2082, lr_0 = 1.7384e-04
Loss = 3.6214e-01, PNorm = 62.8165, GNorm = 1.7189, lr_0 = 1.7372e-04
Loss = 2.9691e-01, PNorm = 62.8185, GNorm = 1.2955, lr_0 = 1.7360e-04
Loss = 3.4443e-01, PNorm = 62.8169, GNorm = 1.5237, lr_0 = 1.7348e-04
Loss = 3.2496e-01, PNorm = 62.8167, GNorm = 1.3471, lr_0 = 1.7336e-04
Loss = 3.7264e-01, PNorm = 62.8194, GNorm = 1.6817, lr_0 = 1.7325e-04
Loss = 3.5518e-01, PNorm = 62.8229, GNorm = 1.4361, lr_0 = 1.7313e-04
Loss = 3.7473e-01, PNorm = 62.8227, GNorm = 1.0179, lr_0 = 1.7301e-04
Loss = 3.2960e-01, PNorm = 62.8242, GNorm = 1.2502, lr_0 = 1.7289e-04
Loss = 3.6384e-01, PNorm = 62.8288, GNorm = 1.3854, lr_0 = 1.7277e-04
Loss = 3.7004e-01, PNorm = 62.8340, GNorm = 1.0874, lr_0 = 1.7265e-04
Loss = 3.7288e-01, PNorm = 62.8376, GNorm = 1.6580, lr_0 = 1.7253e-04
Loss = 3.9037e-01, PNorm = 62.8407, GNorm = 1.5390, lr_0 = 1.7242e-04
Loss = 3.8287e-01, PNorm = 62.8407, GNorm = 1.6335, lr_0 = 1.7230e-04
Loss = 3.6674e-01, PNorm = 62.8420, GNorm = 2.3073, lr_0 = 1.7218e-04
Loss = 3.2206e-01, PNorm = 62.8419, GNorm = 0.8936, lr_0 = 1.7206e-04
Loss = 3.6355e-01, PNorm = 62.8449, GNorm = 1.5626, lr_0 = 1.7194e-04
Loss = 3.4982e-01, PNorm = 62.8492, GNorm = 1.4311, lr_0 = 1.7183e-04
Loss = 3.2056e-01, PNorm = 62.8520, GNorm = 1.4523, lr_0 = 1.7171e-04
Loss = 3.2856e-01, PNorm = 62.8536, GNorm = 0.9255, lr_0 = 1.7159e-04
Loss = 3.8068e-01, PNorm = 62.8532, GNorm = 1.7742, lr_0 = 1.7147e-04
Loss = 3.4530e-01, PNorm = 62.8520, GNorm = 1.4040, lr_0 = 1.7136e-04
Loss = 3.4529e-01, PNorm = 62.8529, GNorm = 1.8679, lr_0 = 1.7124e-04
Loss = 3.4371e-01, PNorm = 62.8535, GNorm = 1.2959, lr_0 = 1.7112e-04
Loss = 3.4739e-01, PNorm = 62.8548, GNorm = 1.3169, lr_0 = 1.7100e-04
Loss = 3.3906e-01, PNorm = 62.8546, GNorm = 1.2919, lr_0 = 1.7089e-04
Loss = 3.3463e-01, PNorm = 62.8558, GNorm = 1.1235, lr_0 = 1.7077e-04
Loss = 3.6451e-01, PNorm = 62.8587, GNorm = 1.0616, lr_0 = 1.7065e-04
Loss = 3.2659e-01, PNorm = 62.8610, GNorm = 1.5397, lr_0 = 1.7054e-04
Loss = 3.1601e-01, PNorm = 62.8616, GNorm = 1.4250, lr_0 = 1.7042e-04
Loss = 3.6526e-01, PNorm = 62.8631, GNorm = 1.4146, lr_0 = 1.7030e-04
Loss = 3.0998e-01, PNorm = 62.8652, GNorm = 1.2289, lr_0 = 1.7019e-04
Loss = 3.5125e-01, PNorm = 62.8660, GNorm = 1.3906, lr_0 = 1.7007e-04
Loss = 3.8862e-01, PNorm = 62.8669, GNorm = 1.3551, lr_0 = 1.6995e-04
Loss = 3.2674e-01, PNorm = 62.8699, GNorm = 1.4211, lr_0 = 1.6984e-04
Loss = 3.5446e-01, PNorm = 62.8701, GNorm = 1.2976, lr_0 = 1.6972e-04
Loss = 3.6667e-01, PNorm = 62.8713, GNorm = 1.2070, lr_0 = 1.6960e-04
Loss = 3.4096e-01, PNorm = 62.8751, GNorm = 0.8990, lr_0 = 1.6949e-04
Loss = 3.4009e-01, PNorm = 62.8781, GNorm = 1.7338, lr_0 = 1.6937e-04
Loss = 3.8173e-01, PNorm = 62.8784, GNorm = 1.3400, lr_0 = 1.6926e-04
Loss = 3.8823e-01, PNorm = 62.8791, GNorm = 1.3261, lr_0 = 1.6914e-04
Loss = 3.3079e-01, PNorm = 62.8828, GNorm = 1.3890, lr_0 = 1.6902e-04
Loss = 3.5135e-01, PNorm = 62.8841, GNorm = 1.3774, lr_0 = 1.6891e-04
Loss = 3.2402e-01, PNorm = 62.8858, GNorm = 1.3866, lr_0 = 1.6879e-04
Loss = 3.7352e-01, PNorm = 62.8877, GNorm = 1.2186, lr_0 = 1.6868e-04
Loss = 3.6324e-01, PNorm = 62.8892, GNorm = 1.2227, lr_0 = 1.6856e-04
Loss = 3.2141e-01, PNorm = 62.8905, GNorm = 1.1985, lr_0 = 1.6845e-04
Loss = 3.2509e-01, PNorm = 62.8937, GNorm = 1.6683, lr_0 = 1.6833e-04
Loss = 3.7195e-01, PNorm = 62.8937, GNorm = 1.5544, lr_0 = 1.6821e-04
Loss = 4.2538e-01, PNorm = 62.8965, GNorm = 1.4443, lr_0 = 1.6810e-04
Loss = 3.5246e-01, PNorm = 62.9008, GNorm = 1.1689, lr_0 = 1.6798e-04
Loss = 3.7777e-01, PNorm = 62.9011, GNorm = 1.7966, lr_0 = 1.6787e-04
Loss = 3.8853e-01, PNorm = 62.9024, GNorm = 1.7158, lr_0 = 1.6775e-04
Loss = 4.0289e-01, PNorm = 62.9063, GNorm = 1.5695, lr_0 = 1.6764e-04
Loss = 3.1962e-01, PNorm = 62.9052, GNorm = 1.8303, lr_0 = 1.6752e-04
Loss = 3.1598e-01, PNorm = 62.9059, GNorm = 1.4212, lr_0 = 1.6741e-04
Loss = 3.9987e-01, PNorm = 62.9102, GNorm = 1.5463, lr_0 = 1.6729e-04
Loss = 3.5658e-01, PNorm = 62.9112, GNorm = 1.5800, lr_0 = 1.6718e-04
Loss = 3.6564e-01, PNorm = 62.9095, GNorm = 1.2619, lr_0 = 1.6707e-04
Loss = 3.6036e-01, PNorm = 62.9102, GNorm = 1.3043, lr_0 = 1.6695e-04
Loss = 3.7797e-01, PNorm = 62.9120, GNorm = 1.4464, lr_0 = 1.6684e-04
Loss = 4.1096e-01, PNorm = 62.9143, GNorm = 2.3863, lr_0 = 1.6672e-04
Loss = 4.0060e-01, PNorm = 62.9154, GNorm = 1.2909, lr_0 = 1.6661e-04
Loss = 3.6146e-01, PNorm = 62.9156, GNorm = 0.8598, lr_0 = 1.6649e-04
Loss = 3.1695e-01, PNorm = 62.9186, GNorm = 1.1829, lr_0 = 1.6638e-04
Loss = 3.7185e-01, PNorm = 62.9181, GNorm = 1.0059, lr_0 = 1.6627e-04
Loss = 3.5982e-01, PNorm = 62.9197, GNorm = 1.6470, lr_0 = 1.6615e-04
Loss = 3.6967e-01, PNorm = 62.9231, GNorm = 2.4492, lr_0 = 1.6604e-04
Loss = 3.7932e-01, PNorm = 62.9233, GNorm = 1.4295, lr_0 = 1.6592e-04
Loss = 3.3224e-01, PNorm = 62.9230, GNorm = 1.7275, lr_0 = 1.6581e-04
Loss = 3.6807e-01, PNorm = 62.9250, GNorm = 1.8835, lr_0 = 1.6570e-04
Loss = 3.4211e-01, PNorm = 62.9244, GNorm = 1.6619, lr_0 = 1.6558e-04
Loss = 3.5987e-01, PNorm = 62.9247, GNorm = 1.9078, lr_0 = 1.6547e-04
Loss = 3.3874e-01, PNorm = 62.9261, GNorm = 1.9754, lr_0 = 1.6536e-04
Loss = 2.9676e-01, PNorm = 62.9278, GNorm = 1.6207, lr_0 = 1.6524e-04
Loss = 3.6430e-01, PNorm = 62.9305, GNorm = 2.1083, lr_0 = 1.6513e-04
Loss = 3.0700e-01, PNorm = 62.9311, GNorm = 1.1268, lr_0 = 1.6502e-04
Loss = 3.8379e-01, PNorm = 62.9317, GNorm = 2.2245, lr_0 = 1.6490e-04
Loss = 4.0616e-01, PNorm = 62.9335, GNorm = 1.5510, lr_0 = 1.6479e-04
Loss = 4.1090e-01, PNorm = 62.9366, GNorm = 1.3977, lr_0 = 1.6468e-04
Loss = 4.1420e-01, PNorm = 62.9390, GNorm = 1.3346, lr_0 = 1.6457e-04
Loss = 3.2803e-01, PNorm = 62.9429, GNorm = 1.3940, lr_0 = 1.6445e-04
Loss = 3.9267e-01, PNorm = 62.9432, GNorm = 1.1267, lr_0 = 1.6434e-04
Loss = 3.5159e-01, PNorm = 62.9431, GNorm = 1.4572, lr_0 = 1.6423e-04
Loss = 3.3126e-01, PNorm = 62.9441, GNorm = 1.1312, lr_0 = 1.6412e-04
Loss = 3.9368e-01, PNorm = 62.9442, GNorm = 2.0887, lr_0 = 1.6400e-04
Loss = 3.4775e-01, PNorm = 62.9487, GNorm = 1.4038, lr_0 = 1.6389e-04
Loss = 3.5703e-01, PNorm = 62.9509, GNorm = 1.7393, lr_0 = 1.6378e-04
Validation mae = 0.111421
Epoch 24
Loss = 3.4715e-01, PNorm = 62.9534, GNorm = 0.8403, lr_0 = 1.6367e-04
Loss = 3.2571e-01, PNorm = 62.9551, GNorm = 1.1145, lr_0 = 1.6355e-04
Loss = 3.5634e-01, PNorm = 62.9568, GNorm = 1.3858, lr_0 = 1.6344e-04
Loss = 3.4503e-01, PNorm = 62.9587, GNorm = 1.3597, lr_0 = 1.6333e-04
Loss = 3.2992e-01, PNorm = 62.9599, GNorm = 1.2411, lr_0 = 1.6322e-04
Loss = 3.3676e-01, PNorm = 62.9595, GNorm = 1.3136, lr_0 = 1.6311e-04
Loss = 3.7131e-01, PNorm = 62.9602, GNorm = 1.4108, lr_0 = 1.6299e-04
Loss = 3.3159e-01, PNorm = 62.9638, GNorm = 1.5716, lr_0 = 1.6288e-04
Loss = 3.5314e-01, PNorm = 62.9644, GNorm = 0.9265, lr_0 = 1.6277e-04
Loss = 3.5465e-01, PNorm = 62.9635, GNorm = 1.5854, lr_0 = 1.6266e-04
Loss = 3.4357e-01, PNorm = 62.9662, GNorm = 1.1463, lr_0 = 1.6255e-04
Loss = 3.8851e-01, PNorm = 62.9676, GNorm = 2.1672, lr_0 = 1.6244e-04
Loss = 3.4061e-01, PNorm = 62.9681, GNorm = 1.4972, lr_0 = 1.6233e-04
Loss = 3.6726e-01, PNorm = 62.9700, GNorm = 1.6037, lr_0 = 1.6221e-04
Loss = 3.5759e-01, PNorm = 62.9711, GNorm = 1.6457, lr_0 = 1.6210e-04
Loss = 3.2198e-01, PNorm = 62.9723, GNorm = 1.8766, lr_0 = 1.6199e-04
Loss = 4.0466e-01, PNorm = 62.9759, GNorm = 1.6797, lr_0 = 1.6188e-04
Loss = 3.5601e-01, PNorm = 62.9779, GNorm = 2.0693, lr_0 = 1.6177e-04
Loss = 4.0229e-01, PNorm = 62.9778, GNorm = 1.4352, lr_0 = 1.6166e-04
Loss = 3.5207e-01, PNorm = 62.9799, GNorm = 1.6385, lr_0 = 1.6155e-04
Loss = 2.9709e-01, PNorm = 62.9814, GNorm = 1.0602, lr_0 = 1.6144e-04
Loss = 3.6293e-01, PNorm = 62.9836, GNorm = 1.2513, lr_0 = 1.6133e-04
Loss = 3.7456e-01, PNorm = 62.9846, GNorm = 1.2359, lr_0 = 1.6122e-04
Loss = 3.6189e-01, PNorm = 62.9870, GNorm = 1.4803, lr_0 = 1.6111e-04
Loss = 3.2419e-01, PNorm = 62.9886, GNorm = 1.9990, lr_0 = 1.6100e-04
Loss = 3.5454e-01, PNorm = 62.9895, GNorm = 1.4658, lr_0 = 1.6089e-04
Loss = 3.0864e-01, PNorm = 62.9909, GNorm = 1.3410, lr_0 = 1.6078e-04
Loss = 3.5217e-01, PNorm = 62.9923, GNorm = 1.1077, lr_0 = 1.6067e-04
Loss = 3.7598e-01, PNorm = 62.9968, GNorm = 1.5089, lr_0 = 1.6056e-04
Loss = 4.0659e-01, PNorm = 63.0010, GNorm = 1.3161, lr_0 = 1.6045e-04
Loss = 3.4595e-01, PNorm = 63.0032, GNorm = 1.2459, lr_0 = 1.6034e-04
Loss = 3.5874e-01, PNorm = 63.0031, GNorm = 1.8005, lr_0 = 1.6023e-04
Loss = 3.5229e-01, PNorm = 63.0038, GNorm = 1.6052, lr_0 = 1.6012e-04
Loss = 3.1887e-01, PNorm = 63.0045, GNorm = 1.2088, lr_0 = 1.6001e-04
Loss = 3.7329e-01, PNorm = 63.0059, GNorm = 1.3976, lr_0 = 1.5990e-04
Loss = 3.9175e-01, PNorm = 63.0100, GNorm = 1.4648, lr_0 = 1.5979e-04
Loss = 3.3669e-01, PNorm = 63.0141, GNorm = 1.3584, lr_0 = 1.5968e-04
Loss = 3.3026e-01, PNorm = 63.0163, GNorm = 1.1722, lr_0 = 1.5957e-04
Loss = 3.6272e-01, PNorm = 63.0216, GNorm = 1.4048, lr_0 = 1.5946e-04
Loss = 3.6994e-01, PNorm = 63.0234, GNorm = 1.9056, lr_0 = 1.5935e-04
Loss = 3.7467e-01, PNorm = 63.0219, GNorm = 1.7023, lr_0 = 1.5924e-04
Loss = 3.4120e-01, PNorm = 63.0239, GNorm = 1.3269, lr_0 = 1.5913e-04
Loss = 3.0865e-01, PNorm = 63.0261, GNorm = 2.0624, lr_0 = 1.5902e-04
Loss = 3.6649e-01, PNorm = 63.0266, GNorm = 1.9569, lr_0 = 1.5891e-04
Loss = 3.3065e-01, PNorm = 63.0279, GNorm = 1.6177, lr_0 = 1.5880e-04
Loss = 3.0436e-01, PNorm = 63.0264, GNorm = 1.2405, lr_0 = 1.5870e-04
Loss = 3.6288e-01, PNorm = 63.0276, GNorm = 1.5372, lr_0 = 1.5859e-04
Loss = 3.6278e-01, PNorm = 63.0302, GNorm = 1.3244, lr_0 = 1.5848e-04
Loss = 3.4306e-01, PNorm = 63.0314, GNorm = 1.8101, lr_0 = 1.5837e-04
Loss = 3.5738e-01, PNorm = 63.0340, GNorm = 1.7255, lr_0 = 1.5826e-04
Loss = 3.0674e-01, PNorm = 63.0364, GNorm = 1.2394, lr_0 = 1.5815e-04
Loss = 3.6674e-01, PNorm = 63.0382, GNorm = 1.4707, lr_0 = 1.5804e-04
Loss = 3.3705e-01, PNorm = 63.0378, GNorm = 1.3486, lr_0 = 1.5794e-04
Loss = 3.6263e-01, PNorm = 63.0380, GNorm = 1.6979, lr_0 = 1.5783e-04
Loss = 3.1612e-01, PNorm = 63.0399, GNorm = 1.4405, lr_0 = 1.5772e-04
Loss = 3.6864e-01, PNorm = 63.0410, GNorm = 2.4593, lr_0 = 1.5761e-04
Loss = 2.9752e-01, PNorm = 63.0434, GNorm = 1.1956, lr_0 = 1.5750e-04
Loss = 3.9713e-01, PNorm = 63.0436, GNorm = 1.5250, lr_0 = 1.5740e-04
Loss = 3.7341e-01, PNorm = 63.0454, GNorm = 1.7019, lr_0 = 1.5729e-04
Loss = 3.6144e-01, PNorm = 63.0475, GNorm = 1.4416, lr_0 = 1.5718e-04
Loss = 3.1671e-01, PNorm = 63.0502, GNorm = 1.2210, lr_0 = 1.5707e-04
Loss = 3.5990e-01, PNorm = 63.0507, GNorm = 1.5074, lr_0 = 1.5697e-04
Loss = 3.5042e-01, PNorm = 63.0536, GNorm = 1.7940, lr_0 = 1.5686e-04
Loss = 3.6857e-01, PNorm = 63.0565, GNorm = 1.1267, lr_0 = 1.5675e-04
Loss = 3.4191e-01, PNorm = 63.0565, GNorm = 1.4667, lr_0 = 1.5664e-04
Loss = 3.7169e-01, PNorm = 63.0582, GNorm = 1.7128, lr_0 = 1.5654e-04
Loss = 3.5461e-01, PNorm = 63.0626, GNorm = 1.4768, lr_0 = 1.5643e-04
Loss = 3.5909e-01, PNorm = 63.0629, GNorm = 1.7626, lr_0 = 1.5632e-04
Loss = 3.5470e-01, PNorm = 63.0644, GNorm = 1.4250, lr_0 = 1.5621e-04
Loss = 3.3123e-01, PNorm = 63.0672, GNorm = 1.5495, lr_0 = 1.5611e-04
Loss = 3.1624e-01, PNorm = 63.0706, GNorm = 1.2451, lr_0 = 1.5600e-04
Loss = 3.5050e-01, PNorm = 63.0733, GNorm = 1.1704, lr_0 = 1.5589e-04
Loss = 3.3457e-01, PNorm = 63.0746, GNorm = 1.5901, lr_0 = 1.5579e-04
Loss = 2.8320e-01, PNorm = 63.0752, GNorm = 1.1473, lr_0 = 1.5568e-04
Loss = 3.9439e-01, PNorm = 63.0784, GNorm = 1.2231, lr_0 = 1.5557e-04
Loss = 3.3008e-01, PNorm = 63.0777, GNorm = 1.2560, lr_0 = 1.5547e-04
Loss = 3.5858e-01, PNorm = 63.0795, GNorm = 1.5406, lr_0 = 1.5536e-04
Loss = 3.0932e-01, PNorm = 63.0822, GNorm = 1.1279, lr_0 = 1.5525e-04
Loss = 3.6213e-01, PNorm = 63.0851, GNorm = 1.8646, lr_0 = 1.5515e-04
Loss = 3.4375e-01, PNorm = 63.0849, GNorm = 1.3931, lr_0 = 1.5504e-04
Loss = 3.7636e-01, PNorm = 63.0848, GNorm = 1.3076, lr_0 = 1.5493e-04
Loss = 3.4982e-01, PNorm = 63.0868, GNorm = 1.3968, lr_0 = 1.5483e-04
Loss = 3.4151e-01, PNorm = 63.0871, GNorm = 1.5493, lr_0 = 1.5472e-04
Loss = 3.6472e-01, PNorm = 63.0876, GNorm = 1.4081, lr_0 = 1.5462e-04
Loss = 3.5288e-01, PNorm = 63.0896, GNorm = 1.2868, lr_0 = 1.5451e-04
Loss = 3.5062e-01, PNorm = 63.0926, GNorm = 1.7546, lr_0 = 1.5440e-04
Loss = 3.3329e-01, PNorm = 63.0940, GNorm = 1.8122, lr_0 = 1.5430e-04
Loss = 3.5535e-01, PNorm = 63.0943, GNorm = 1.4285, lr_0 = 1.5419e-04
Loss = 4.0432e-01, PNorm = 63.0969, GNorm = 1.4197, lr_0 = 1.5409e-04
Loss = 3.6038e-01, PNorm = 63.0979, GNorm = 1.4965, lr_0 = 1.5398e-04
Loss = 3.6433e-01, PNorm = 63.0988, GNorm = 1.4185, lr_0 = 1.5388e-04
Loss = 3.3327e-01, PNorm = 63.1015, GNorm = 1.1447, lr_0 = 1.5377e-04
Loss = 3.0636e-01, PNorm = 63.1036, GNorm = 1.1209, lr_0 = 1.5367e-04
Loss = 3.7810e-01, PNorm = 63.1038, GNorm = 1.3929, lr_0 = 1.5356e-04
Loss = 3.6283e-01, PNorm = 63.1038, GNorm = 1.3135, lr_0 = 1.5346e-04
Loss = 3.6426e-01, PNorm = 63.1050, GNorm = 1.4080, lr_0 = 1.5335e-04
Loss = 3.9547e-01, PNorm = 63.1046, GNorm = 1.3053, lr_0 = 1.5325e-04
Loss = 3.7030e-01, PNorm = 63.1065, GNorm = 1.9562, lr_0 = 1.5314e-04
Loss = 3.5256e-01, PNorm = 63.1087, GNorm = 1.4410, lr_0 = 1.5304e-04
Loss = 3.6943e-01, PNorm = 63.1090, GNorm = 1.1055, lr_0 = 1.5293e-04
Loss = 3.5901e-01, PNorm = 63.1104, GNorm = 1.3040, lr_0 = 1.5283e-04
Loss = 3.7022e-01, PNorm = 63.1129, GNorm = 1.3184, lr_0 = 1.5272e-04
Loss = 3.8151e-01, PNorm = 63.1145, GNorm = 1.3816, lr_0 = 1.5262e-04
Loss = 3.6048e-01, PNorm = 63.1152, GNorm = 1.1858, lr_0 = 1.5251e-04
Loss = 3.7549e-01, PNorm = 63.1154, GNorm = 1.4719, lr_0 = 1.5241e-04
Loss = 3.4915e-01, PNorm = 63.1177, GNorm = 1.3319, lr_0 = 1.5230e-04
Loss = 3.7385e-01, PNorm = 63.1213, GNorm = 1.7695, lr_0 = 1.5220e-04
Loss = 4.0474e-01, PNorm = 63.1218, GNorm = 1.8991, lr_0 = 1.5209e-04
Loss = 3.4075e-01, PNorm = 63.1210, GNorm = 2.5900, lr_0 = 1.5199e-04
Loss = 3.5504e-01, PNorm = 63.1235, GNorm = 1.2053, lr_0 = 1.5189e-04
Loss = 3.8020e-01, PNorm = 63.1258, GNorm = 2.1128, lr_0 = 1.5178e-04
Loss = 3.4765e-01, PNorm = 63.1272, GNorm = 1.2719, lr_0 = 1.5168e-04
Loss = 3.8002e-01, PNorm = 63.1297, GNorm = 1.2439, lr_0 = 1.5157e-04
Loss = 3.3222e-01, PNorm = 63.1321, GNorm = 1.3200, lr_0 = 1.5147e-04
Loss = 3.1883e-01, PNorm = 63.1323, GNorm = 1.6320, lr_0 = 1.5137e-04
Loss = 3.7049e-01, PNorm = 63.1350, GNorm = 1.6319, lr_0 = 1.5126e-04
Loss = 4.0357e-01, PNorm = 63.1396, GNorm = 1.4905, lr_0 = 1.5116e-04
Loss = 3.8032e-01, PNorm = 63.1441, GNorm = 1.8153, lr_0 = 1.5106e-04
Loss = 3.5285e-01, PNorm = 63.1456, GNorm = 1.8095, lr_0 = 1.5095e-04
Loss = 3.6733e-01, PNorm = 63.1464, GNorm = 1.4402, lr_0 = 1.5085e-04
Validation mae = 0.111669
Epoch 25
Loss = 3.1579e-01, PNorm = 63.1476, GNorm = 1.3256, lr_0 = 1.5075e-04
Loss = 2.9928e-01, PNorm = 63.1487, GNorm = 1.1529, lr_0 = 1.5064e-04
Loss = 3.1071e-01, PNorm = 63.1488, GNorm = 1.1592, lr_0 = 1.5054e-04
Loss = 3.5788e-01, PNorm = 63.1512, GNorm = 1.3132, lr_0 = 1.5044e-04
Loss = 3.1116e-01, PNorm = 63.1537, GNorm = 1.4758, lr_0 = 1.5033e-04
Loss = 3.1961e-01, PNorm = 63.1551, GNorm = 1.4363, lr_0 = 1.5023e-04
Loss = 3.7735e-01, PNorm = 63.1534, GNorm = 1.2749, lr_0 = 1.5013e-04
Loss = 3.2388e-01, PNorm = 63.1532, GNorm = 1.0824, lr_0 = 1.5002e-04
Loss = 3.5497e-01, PNorm = 63.1533, GNorm = 1.2887, lr_0 = 1.4992e-04
Loss = 3.4103e-01, PNorm = 63.1540, GNorm = 1.5404, lr_0 = 1.4982e-04
Loss = 3.3224e-01, PNorm = 63.1561, GNorm = 1.1238, lr_0 = 1.4972e-04
Loss = 3.4731e-01, PNorm = 63.1577, GNorm = 2.0299, lr_0 = 1.4961e-04
Loss = 3.5261e-01, PNorm = 63.1602, GNorm = 1.5372, lr_0 = 1.4951e-04
Loss = 3.9905e-01, PNorm = 63.1634, GNorm = 1.2098, lr_0 = 1.4941e-04
Loss = 3.5329e-01, PNorm = 63.1650, GNorm = 1.3129, lr_0 = 1.4931e-04
Loss = 3.4518e-01, PNorm = 63.1644, GNorm = 1.5351, lr_0 = 1.4920e-04
Loss = 3.4580e-01, PNorm = 63.1652, GNorm = 1.3978, lr_0 = 1.4910e-04
Loss = 3.2914e-01, PNorm = 63.1662, GNorm = 1.3526, lr_0 = 1.4900e-04
Loss = 3.1694e-01, PNorm = 63.1677, GNorm = 1.4264, lr_0 = 1.4890e-04
Loss = 3.9908e-01, PNorm = 63.1713, GNorm = 1.8207, lr_0 = 1.4880e-04
Loss = 3.1825e-01, PNorm = 63.1718, GNorm = 1.5129, lr_0 = 1.4869e-04
Loss = 3.4506e-01, PNorm = 63.1723, GNorm = 1.5613, lr_0 = 1.4859e-04
Loss = 4.0434e-01, PNorm = 63.1774, GNorm = 1.4804, lr_0 = 1.4849e-04
Loss = 3.2075e-01, PNorm = 63.1787, GNorm = 1.5710, lr_0 = 1.4839e-04
Loss = 3.3969e-01, PNorm = 63.1786, GNorm = 1.6729, lr_0 = 1.4829e-04
Loss = 3.5376e-01, PNorm = 63.1803, GNorm = 1.4341, lr_0 = 1.4818e-04
Loss = 3.3066e-01, PNorm = 63.1807, GNorm = 1.8305, lr_0 = 1.4808e-04
Loss = 3.4296e-01, PNorm = 63.1804, GNorm = 1.5810, lr_0 = 1.4798e-04
Loss = 3.7919e-01, PNorm = 63.1828, GNorm = 1.1287, lr_0 = 1.4788e-04
Loss = 3.5498e-01, PNorm = 63.1858, GNorm = 1.0955, lr_0 = 1.4778e-04
Loss = 3.4507e-01, PNorm = 63.1866, GNorm = 1.6442, lr_0 = 1.4768e-04
Loss = 3.1346e-01, PNorm = 63.1883, GNorm = 1.1622, lr_0 = 1.4758e-04
Loss = 3.5797e-01, PNorm = 63.1894, GNorm = 1.3823, lr_0 = 1.4748e-04
Loss = 3.3192e-01, PNorm = 63.1927, GNorm = 1.4167, lr_0 = 1.4737e-04
Loss = 3.3969e-01, PNorm = 63.1947, GNorm = 1.1516, lr_0 = 1.4727e-04
Loss = 3.5538e-01, PNorm = 63.1944, GNorm = 1.3930, lr_0 = 1.4717e-04
Loss = 3.0860e-01, PNorm = 63.1946, GNorm = 1.1291, lr_0 = 1.4707e-04
Loss = 3.5288e-01, PNorm = 63.1953, GNorm = 1.2997, lr_0 = 1.4697e-04
Loss = 3.6777e-01, PNorm = 63.1978, GNorm = 1.5138, lr_0 = 1.4687e-04
Loss = 3.6073e-01, PNorm = 63.2013, GNorm = 1.6320, lr_0 = 1.4677e-04
Loss = 3.6707e-01, PNorm = 63.2032, GNorm = 1.1065, lr_0 = 1.4667e-04
Loss = 3.9393e-01, PNorm = 63.2044, GNorm = 1.5862, lr_0 = 1.4657e-04
Loss = 3.3843e-01, PNorm = 63.2049, GNorm = 1.3646, lr_0 = 1.4647e-04
Loss = 3.3206e-01, PNorm = 63.2083, GNorm = 1.6456, lr_0 = 1.4637e-04
Loss = 3.5141e-01, PNorm = 63.2101, GNorm = 1.3593, lr_0 = 1.4627e-04
Loss = 3.2546e-01, PNorm = 63.2108, GNorm = 1.3176, lr_0 = 1.4617e-04
Loss = 3.7233e-01, PNorm = 63.2134, GNorm = 1.4659, lr_0 = 1.4607e-04
Loss = 3.0405e-01, PNorm = 63.2167, GNorm = 1.3579, lr_0 = 1.4597e-04
Loss = 3.5721e-01, PNorm = 63.2188, GNorm = 1.3956, lr_0 = 1.4587e-04
Loss = 3.7608e-01, PNorm = 63.2186, GNorm = 1.6455, lr_0 = 1.4577e-04
Loss = 4.0420e-01, PNorm = 63.2197, GNorm = 1.7114, lr_0 = 1.4567e-04
Loss = 3.9522e-01, PNorm = 63.2219, GNorm = 1.9337, lr_0 = 1.4557e-04
Loss = 3.6916e-01, PNorm = 63.2214, GNorm = 1.6849, lr_0 = 1.4547e-04
Loss = 3.6206e-01, PNorm = 63.2228, GNorm = 1.8540, lr_0 = 1.4537e-04
Loss = 3.9141e-01, PNorm = 63.2250, GNorm = 1.4179, lr_0 = 1.4527e-04
Loss = 3.9695e-01, PNorm = 63.2245, GNorm = 1.1709, lr_0 = 1.4517e-04
Loss = 3.5494e-01, PNorm = 63.2258, GNorm = 1.4062, lr_0 = 1.4507e-04
Loss = 3.7348e-01, PNorm = 63.2263, GNorm = 1.5177, lr_0 = 1.4497e-04
Loss = 3.3929e-01, PNorm = 63.2258, GNorm = 1.5131, lr_0 = 1.4487e-04
Loss = 3.9262e-01, PNorm = 63.2274, GNorm = 1.9055, lr_0 = 1.4477e-04
Loss = 3.7261e-01, PNorm = 63.2252, GNorm = 1.3019, lr_0 = 1.4467e-04
Loss = 3.4195e-01, PNorm = 63.2278, GNorm = 1.5288, lr_0 = 1.4457e-04
Loss = 3.8180e-01, PNorm = 63.2308, GNorm = 1.4791, lr_0 = 1.4447e-04
Loss = 3.3610e-01, PNorm = 63.2325, GNorm = 1.2808, lr_0 = 1.4438e-04
Loss = 3.4312e-01, PNorm = 63.2344, GNorm = 1.1811, lr_0 = 1.4428e-04
Loss = 3.6142e-01, PNorm = 63.2339, GNorm = 1.3597, lr_0 = 1.4418e-04
Loss = 3.5166e-01, PNorm = 63.2336, GNorm = 1.4564, lr_0 = 1.4408e-04
Loss = 3.7905e-01, PNorm = 63.2362, GNorm = 1.3643, lr_0 = 1.4398e-04
Loss = 3.1477e-01, PNorm = 63.2390, GNorm = 1.3590, lr_0 = 1.4388e-04
Loss = 3.7238e-01, PNorm = 63.2401, GNorm = 1.8573, lr_0 = 1.4378e-04
Loss = 3.2830e-01, PNorm = 63.2411, GNorm = 1.0796, lr_0 = 1.4368e-04
Loss = 3.1624e-01, PNorm = 63.2405, GNorm = 1.8926, lr_0 = 1.4359e-04
Loss = 4.4789e-01, PNorm = 63.2415, GNorm = 1.8655, lr_0 = 1.4349e-04
Loss = 3.0840e-01, PNorm = 63.2415, GNorm = 1.6099, lr_0 = 1.4339e-04
Loss = 3.3868e-01, PNorm = 63.2435, GNorm = 1.5264, lr_0 = 1.4329e-04
Loss = 3.4541e-01, PNorm = 63.2450, GNorm = 1.3350, lr_0 = 1.4319e-04
Loss = 3.3354e-01, PNorm = 63.2470, GNorm = 1.3346, lr_0 = 1.4310e-04
Loss = 3.1624e-01, PNorm = 63.2496, GNorm = 1.5072, lr_0 = 1.4300e-04
Loss = 4.0760e-01, PNorm = 63.2525, GNorm = 2.1277, lr_0 = 1.4290e-04
Loss = 3.1127e-01, PNorm = 63.2543, GNorm = 1.4253, lr_0 = 1.4280e-04
Loss = 3.1876e-01, PNorm = 63.2574, GNorm = 1.1605, lr_0 = 1.4270e-04
Loss = 3.8160e-01, PNorm = 63.2592, GNorm = 1.5953, lr_0 = 1.4261e-04
Loss = 3.6330e-01, PNorm = 63.2586, GNorm = 2.0302, lr_0 = 1.4251e-04
Loss = 3.8974e-01, PNorm = 63.2607, GNorm = 1.3352, lr_0 = 1.4241e-04
Loss = 3.1252e-01, PNorm = 63.2641, GNorm = 1.3576, lr_0 = 1.4231e-04
Loss = 3.6459e-01, PNorm = 63.2649, GNorm = 1.3194, lr_0 = 1.4222e-04
Loss = 3.4249e-01, PNorm = 63.2653, GNorm = 1.3812, lr_0 = 1.4212e-04
Loss = 3.2663e-01, PNorm = 63.2669, GNorm = 1.5264, lr_0 = 1.4202e-04
Loss = 3.7498e-01, PNorm = 63.2657, GNorm = 1.3494, lr_0 = 1.4192e-04
Loss = 3.0364e-01, PNorm = 63.2676, GNorm = 1.1247, lr_0 = 1.4183e-04
Loss = 4.0897e-01, PNorm = 63.2706, GNorm = 1.5703, lr_0 = 1.4173e-04
Loss = 3.8132e-01, PNorm = 63.2721, GNorm = 1.6320, lr_0 = 1.4163e-04
Loss = 3.7530e-01, PNorm = 63.2726, GNorm = 1.4343, lr_0 = 1.4153e-04
Loss = 3.4435e-01, PNorm = 63.2730, GNorm = 1.3713, lr_0 = 1.4144e-04
Loss = 3.5499e-01, PNorm = 63.2743, GNorm = 2.0648, lr_0 = 1.4134e-04
Loss = 3.4352e-01, PNorm = 63.2765, GNorm = 2.1191, lr_0 = 1.4124e-04
Loss = 3.7773e-01, PNorm = 63.2769, GNorm = 1.8709, lr_0 = 1.4115e-04
Loss = 4.0093e-01, PNorm = 63.2784, GNorm = 1.5612, lr_0 = 1.4105e-04
Loss = 3.3729e-01, PNorm = 63.2807, GNorm = 1.5488, lr_0 = 1.4095e-04
Loss = 3.5668e-01, PNorm = 63.2819, GNorm = 1.1670, lr_0 = 1.4086e-04
Loss = 3.2902e-01, PNorm = 63.2824, GNorm = 1.0403, lr_0 = 1.4076e-04
Loss = 3.5671e-01, PNorm = 63.2827, GNorm = 2.2127, lr_0 = 1.4066e-04
Loss = 3.0599e-01, PNorm = 63.2829, GNorm = 1.5184, lr_0 = 1.4057e-04
Loss = 3.2505e-01, PNorm = 63.2843, GNorm = 1.1966, lr_0 = 1.4047e-04
Loss = 3.5759e-01, PNorm = 63.2842, GNorm = 1.8080, lr_0 = 1.4038e-04
Loss = 3.3690e-01, PNorm = 63.2846, GNorm = 1.6877, lr_0 = 1.4028e-04
Loss = 3.1984e-01, PNorm = 63.2843, GNorm = 1.6824, lr_0 = 1.4018e-04
Loss = 4.7299e-01, PNorm = 63.2869, GNorm = 2.3792, lr_0 = 1.4009e-04
Loss = 3.2906e-01, PNorm = 63.2879, GNorm = 1.4148, lr_0 = 1.3999e-04
Loss = 3.3395e-01, PNorm = 63.2874, GNorm = 1.4579, lr_0 = 1.3990e-04
Loss = 3.8458e-01, PNorm = 63.2881, GNorm = 1.5688, lr_0 = 1.3980e-04
Loss = 3.9554e-01, PNorm = 63.2906, GNorm = 1.3905, lr_0 = 1.3970e-04
Loss = 3.6373e-01, PNorm = 63.2920, GNorm = 1.2545, lr_0 = 1.3961e-04
Loss = 3.6640e-01, PNorm = 63.2934, GNorm = 1.2853, lr_0 = 1.3951e-04
Loss = 3.3764e-01, PNorm = 63.2948, GNorm = 1.4792, lr_0 = 1.3942e-04
Loss = 3.5275e-01, PNorm = 63.2943, GNorm = 1.4145, lr_0 = 1.3932e-04
Loss = 3.1876e-01, PNorm = 63.2967, GNorm = 1.6608, lr_0 = 1.3923e-04
Loss = 3.1688e-01, PNorm = 63.2991, GNorm = 1.0440, lr_0 = 1.3913e-04
Loss = 3.3270e-01, PNorm = 63.3001, GNorm = 2.1335, lr_0 = 1.3904e-04
Loss = 3.9583e-01, PNorm = 63.3009, GNorm = 1.2308, lr_0 = 1.3894e-04
Validation mae = 0.112121
Epoch 26
Loss = 3.8709e-01, PNorm = 63.3042, GNorm = 1.4193, lr_0 = 1.3884e-04
Loss = 3.2183e-01, PNorm = 63.3077, GNorm = 1.2597, lr_0 = 1.3875e-04
Loss = 3.3208e-01, PNorm = 63.3100, GNorm = 1.3888, lr_0 = 1.3865e-04
Loss = 3.0057e-01, PNorm = 63.3129, GNorm = 1.1908, lr_0 = 1.3856e-04
Loss = 3.3535e-01, PNorm = 63.3148, GNorm = 1.3018, lr_0 = 1.3846e-04
Loss = 3.0026e-01, PNorm = 63.3165, GNorm = 1.4953, lr_0 = 1.3837e-04
Loss = 3.5709e-01, PNorm = 63.3173, GNorm = 1.6324, lr_0 = 1.3828e-04
Loss = 3.3460e-01, PNorm = 63.3174, GNorm = 0.8888, lr_0 = 1.3818e-04
Loss = 3.3006e-01, PNorm = 63.3185, GNorm = 1.2471, lr_0 = 1.3809e-04
Loss = 3.5000e-01, PNorm = 63.3195, GNorm = 1.4343, lr_0 = 1.3799e-04
Loss = 3.4234e-01, PNorm = 63.3216, GNorm = 1.7215, lr_0 = 1.3790e-04
Loss = 3.1861e-01, PNorm = 63.3259, GNorm = 1.6792, lr_0 = 1.3780e-04
Loss = 3.6149e-01, PNorm = 63.3252, GNorm = 1.5586, lr_0 = 1.3771e-04
Loss = 3.3195e-01, PNorm = 63.3262, GNorm = 1.3135, lr_0 = 1.3761e-04
Loss = 3.8992e-01, PNorm = 63.3260, GNorm = 1.5667, lr_0 = 1.3752e-04
Loss = 3.2410e-01, PNorm = 63.3261, GNorm = 1.1706, lr_0 = 1.3742e-04
Loss = 3.4692e-01, PNorm = 63.3294, GNorm = 2.0420, lr_0 = 1.3733e-04
Loss = 3.2817e-01, PNorm = 63.3288, GNorm = 1.6062, lr_0 = 1.3724e-04
Loss = 3.6138e-01, PNorm = 63.3281, GNorm = 1.5626, lr_0 = 1.3714e-04
Loss = 3.6205e-01, PNorm = 63.3304, GNorm = 1.4143, lr_0 = 1.3705e-04
Loss = 3.8485e-01, PNorm = 63.3326, GNorm = 1.0859, lr_0 = 1.3695e-04
Loss = 3.3614e-01, PNorm = 63.3356, GNorm = 1.4953, lr_0 = 1.3686e-04
Loss = 3.2769e-01, PNorm = 63.3364, GNorm = 1.1401, lr_0 = 1.3677e-04
Loss = 3.5108e-01, PNorm = 63.3362, GNorm = 1.6453, lr_0 = 1.3667e-04
Loss = 3.1900e-01, PNorm = 63.3383, GNorm = 1.2957, lr_0 = 1.3658e-04
Loss = 3.5992e-01, PNorm = 63.3374, GNorm = 1.1373, lr_0 = 1.3649e-04
Loss = 3.3845e-01, PNorm = 63.3370, GNorm = 1.4980, lr_0 = 1.3639e-04
Loss = 4.0203e-01, PNorm = 63.3390, GNorm = 1.5413, lr_0 = 1.3630e-04
Loss = 3.2227e-01, PNorm = 63.3403, GNorm = 1.2590, lr_0 = 1.3621e-04
Loss = 3.1294e-01, PNorm = 63.3409, GNorm = 1.4570, lr_0 = 1.3611e-04
Loss = 3.7120e-01, PNorm = 63.3403, GNorm = 1.2971, lr_0 = 1.3602e-04
Loss = 3.2358e-01, PNorm = 63.3419, GNorm = 1.1100, lr_0 = 1.3593e-04
Loss = 3.5247e-01, PNorm = 63.3434, GNorm = 1.6654, lr_0 = 1.3583e-04
Loss = 4.2757e-01, PNorm = 63.3445, GNorm = 1.4092, lr_0 = 1.3574e-04
Loss = 3.7560e-01, PNorm = 63.3457, GNorm = 1.3450, lr_0 = 1.3565e-04
Loss = 3.7520e-01, PNorm = 63.3465, GNorm = 1.3068, lr_0 = 1.3555e-04
Loss = 3.4121e-01, PNorm = 63.3459, GNorm = 1.9731, lr_0 = 1.3546e-04
Loss = 3.4396e-01, PNorm = 63.3476, GNorm = 1.5572, lr_0 = 1.3537e-04
Loss = 3.7077e-01, PNorm = 63.3489, GNorm = 1.8383, lr_0 = 1.3528e-04
Loss = 3.1451e-01, PNorm = 63.3508, GNorm = 1.8212, lr_0 = 1.3518e-04
Loss = 2.9123e-01, PNorm = 63.3514, GNorm = 1.1507, lr_0 = 1.3509e-04
Loss = 3.5957e-01, PNorm = 63.3503, GNorm = 1.1765, lr_0 = 1.3500e-04
Loss = 3.4236e-01, PNorm = 63.3507, GNorm = 1.3254, lr_0 = 1.3491e-04
Loss = 3.7666e-01, PNorm = 63.3528, GNorm = 1.4895, lr_0 = 1.3481e-04
Loss = 3.6225e-01, PNorm = 63.3554, GNorm = 1.8037, lr_0 = 1.3472e-04
Loss = 3.5857e-01, PNorm = 63.3556, GNorm = 1.5821, lr_0 = 1.3463e-04
Loss = 3.1938e-01, PNorm = 63.3576, GNorm = 1.2601, lr_0 = 1.3454e-04
Loss = 3.6064e-01, PNorm = 63.3571, GNorm = 1.6561, lr_0 = 1.3444e-04
Loss = 3.2770e-01, PNorm = 63.3582, GNorm = 1.4132, lr_0 = 1.3435e-04
Loss = 2.8785e-01, PNorm = 63.3614, GNorm = 1.1377, lr_0 = 1.3426e-04
Loss = 3.4047e-01, PNorm = 63.3629, GNorm = 2.0903, lr_0 = 1.3417e-04
Loss = 3.4715e-01, PNorm = 63.3641, GNorm = 1.4485, lr_0 = 1.3408e-04
Loss = 3.5843e-01, PNorm = 63.3662, GNorm = 1.2506, lr_0 = 1.3398e-04
Loss = 3.6534e-01, PNorm = 63.3653, GNorm = 1.4583, lr_0 = 1.3389e-04
Loss = 3.4800e-01, PNorm = 63.3658, GNorm = 1.6464, lr_0 = 1.3380e-04
Loss = 3.7064e-01, PNorm = 63.3681, GNorm = 1.1783, lr_0 = 1.3371e-04
Loss = 3.8053e-01, PNorm = 63.3697, GNorm = 1.5533, lr_0 = 1.3362e-04
Loss = 3.8675e-01, PNorm = 63.3703, GNorm = 1.3977, lr_0 = 1.3353e-04
Loss = 3.5153e-01, PNorm = 63.3714, GNorm = 1.7753, lr_0 = 1.3343e-04
Loss = 3.1749e-01, PNorm = 63.3713, GNorm = 1.4512, lr_0 = 1.3334e-04
Loss = 3.3169e-01, PNorm = 63.3722, GNorm = 1.7159, lr_0 = 1.3325e-04
Loss = 3.3840e-01, PNorm = 63.3731, GNorm = 1.3867, lr_0 = 1.3316e-04
Loss = 3.2983e-01, PNorm = 63.3751, GNorm = 1.9069, lr_0 = 1.3307e-04
Loss = 3.2111e-01, PNorm = 63.3757, GNorm = 3.1168, lr_0 = 1.3298e-04
Loss = 3.7388e-01, PNorm = 63.3756, GNorm = 1.2310, lr_0 = 1.3289e-04
Loss = 3.1969e-01, PNorm = 63.3762, GNorm = 1.3751, lr_0 = 1.3280e-04
Loss = 3.3052e-01, PNorm = 63.3789, GNorm = 1.3180, lr_0 = 1.3270e-04
Loss = 3.6843e-01, PNorm = 63.3810, GNorm = 1.4044, lr_0 = 1.3261e-04
Loss = 3.4452e-01, PNorm = 63.3825, GNorm = 2.1091, lr_0 = 1.3252e-04
Loss = 3.4564e-01, PNorm = 63.3843, GNorm = 1.0907, lr_0 = 1.3243e-04
Loss = 4.1668e-01, PNorm = 63.3855, GNorm = 2.2819, lr_0 = 1.3234e-04
Loss = 3.8608e-01, PNorm = 63.3895, GNorm = 1.6394, lr_0 = 1.3225e-04
Loss = 3.4126e-01, PNorm = 63.3917, GNorm = 0.9303, lr_0 = 1.3216e-04
Loss = 3.5046e-01, PNorm = 63.3926, GNorm = 1.5280, lr_0 = 1.3207e-04
Loss = 3.3945e-01, PNorm = 63.3936, GNorm = 1.3271, lr_0 = 1.3198e-04
Loss = 3.3341e-01, PNorm = 63.3956, GNorm = 1.7241, lr_0 = 1.3189e-04
Loss = 3.6900e-01, PNorm = 63.3956, GNorm = 1.4951, lr_0 = 1.3180e-04
Loss = 3.9775e-01, PNorm = 63.3973, GNorm = 1.4708, lr_0 = 1.3171e-04
Loss = 3.3837e-01, PNorm = 63.4003, GNorm = 1.4156, lr_0 = 1.3162e-04
Loss = 2.9431e-01, PNorm = 63.4024, GNorm = 1.5100, lr_0 = 1.3153e-04
Loss = 3.2846e-01, PNorm = 63.4036, GNorm = 1.5794, lr_0 = 1.3144e-04
Loss = 4.0461e-01, PNorm = 63.4039, GNorm = 1.9538, lr_0 = 1.3135e-04
Loss = 3.1393e-01, PNorm = 63.4053, GNorm = 1.2703, lr_0 = 1.3126e-04
Loss = 3.5512e-01, PNorm = 63.4070, GNorm = 1.5140, lr_0 = 1.3117e-04
Loss = 3.5428e-01, PNorm = 63.4073, GNorm = 1.2331, lr_0 = 1.3108e-04
Loss = 3.2669e-01, PNorm = 63.4078, GNorm = 0.9741, lr_0 = 1.3099e-04
Loss = 3.3548e-01, PNorm = 63.4083, GNorm = 1.2615, lr_0 = 1.3090e-04
Loss = 3.4958e-01, PNorm = 63.4102, GNorm = 1.3898, lr_0 = 1.3081e-04
Loss = 3.3820e-01, PNorm = 63.4123, GNorm = 1.3218, lr_0 = 1.3072e-04
Loss = 3.2822e-01, PNorm = 63.4119, GNorm = 0.9874, lr_0 = 1.3063e-04
Loss = 3.6549e-01, PNorm = 63.4113, GNorm = 0.9103, lr_0 = 1.3054e-04
Loss = 4.0039e-01, PNorm = 63.4111, GNorm = 1.6911, lr_0 = 1.3045e-04
Loss = 3.2117e-01, PNorm = 63.4113, GNorm = 1.0349, lr_0 = 1.3036e-04
Loss = 4.2539e-01, PNorm = 63.4152, GNorm = 1.3824, lr_0 = 1.3027e-04
Loss = 3.2264e-01, PNorm = 63.4173, GNorm = 1.3091, lr_0 = 1.3018e-04
Loss = 3.4945e-01, PNorm = 63.4183, GNorm = 1.4290, lr_0 = 1.3009e-04
Loss = 3.4974e-01, PNorm = 63.4199, GNorm = 1.2198, lr_0 = 1.3000e-04
Loss = 3.5616e-01, PNorm = 63.4195, GNorm = 1.6874, lr_0 = 1.2992e-04
Loss = 4.0774e-01, PNorm = 63.4207, GNorm = 2.8055, lr_0 = 1.2983e-04
Loss = 3.1321e-01, PNorm = 63.4219, GNorm = 2.0094, lr_0 = 1.2974e-04
Loss = 3.6606e-01, PNorm = 63.4235, GNorm = 1.2068, lr_0 = 1.2965e-04
Loss = 3.4337e-01, PNorm = 63.4252, GNorm = 1.0043, lr_0 = 1.2956e-04
Loss = 3.0546e-01, PNorm = 63.4263, GNorm = 1.1837, lr_0 = 1.2947e-04
Loss = 2.9798e-01, PNorm = 63.4260, GNorm = 1.7807, lr_0 = 1.2938e-04
Loss = 3.4728e-01, PNorm = 63.4260, GNorm = 1.8951, lr_0 = 1.2929e-04
Loss = 3.6242e-01, PNorm = 63.4260, GNorm = 1.3090, lr_0 = 1.2921e-04
Loss = 3.5039e-01, PNorm = 63.4274, GNorm = 1.2991, lr_0 = 1.2912e-04
Loss = 3.7477e-01, PNorm = 63.4297, GNorm = 2.3927, lr_0 = 1.2903e-04
Loss = 3.5198e-01, PNorm = 63.4314, GNorm = 1.9206, lr_0 = 1.2894e-04
Loss = 3.5773e-01, PNorm = 63.4325, GNorm = 1.4602, lr_0 = 1.2885e-04
Loss = 3.3776e-01, PNorm = 63.4332, GNorm = 1.9393, lr_0 = 1.2876e-04
Loss = 3.3464e-01, PNorm = 63.4339, GNorm = 2.7961, lr_0 = 1.2867e-04
Loss = 3.6734e-01, PNorm = 63.4372, GNorm = 2.2529, lr_0 = 1.2859e-04
Loss = 3.7483e-01, PNorm = 63.4387, GNorm = 1.6696, lr_0 = 1.2850e-04
Loss = 3.4541e-01, PNorm = 63.4384, GNorm = 1.0906, lr_0 = 1.2841e-04
Loss = 3.9126e-01, PNorm = 63.4411, GNorm = 1.5047, lr_0 = 1.2832e-04
Loss = 3.8583e-01, PNorm = 63.4417, GNorm = 1.7410, lr_0 = 1.2823e-04
Loss = 3.4290e-01, PNorm = 63.4412, GNorm = 1.4843, lr_0 = 1.2815e-04
Loss = 3.7194e-01, PNorm = 63.4423, GNorm = 1.3662, lr_0 = 1.2806e-04
Loss = 3.4498e-01, PNorm = 63.4455, GNorm = 1.4706, lr_0 = 1.2797e-04
Validation mae = 0.111525
Epoch 27
Loss = 3.5839e-01, PNorm = 63.4454, GNorm = 1.0864, lr_0 = 1.2788e-04
Loss = 3.7480e-01, PNorm = 63.4459, GNorm = 2.4931, lr_0 = 1.2780e-04
Loss = 3.2534e-01, PNorm = 63.4464, GNorm = 1.6537, lr_0 = 1.2771e-04
Loss = 3.3159e-01, PNorm = 63.4481, GNorm = 1.5201, lr_0 = 1.2762e-04
Loss = 3.2323e-01, PNorm = 63.4483, GNorm = 1.7695, lr_0 = 1.2753e-04
Loss = 3.6104e-01, PNorm = 63.4506, GNorm = 1.2745, lr_0 = 1.2745e-04
Loss = 3.2220e-01, PNorm = 63.4531, GNorm = 1.5006, lr_0 = 1.2736e-04
Loss = 3.2935e-01, PNorm = 63.4528, GNorm = 1.2822, lr_0 = 1.2727e-04
Loss = 3.5937e-01, PNorm = 63.4538, GNorm = 1.6173, lr_0 = 1.2718e-04
Loss = 3.6626e-01, PNorm = 63.4553, GNorm = 1.7485, lr_0 = 1.2710e-04
Loss = 3.3574e-01, PNorm = 63.4575, GNorm = 1.2565, lr_0 = 1.2701e-04
Loss = 3.2888e-01, PNorm = 63.4584, GNorm = 1.5291, lr_0 = 1.2692e-04
Loss = 3.3058e-01, PNorm = 63.4592, GNorm = 1.3641, lr_0 = 1.2684e-04
Loss = 3.5882e-01, PNorm = 63.4620, GNorm = 1.3430, lr_0 = 1.2675e-04
Loss = 3.6048e-01, PNorm = 63.4647, GNorm = 2.0134, lr_0 = 1.2666e-04
Loss = 3.5382e-01, PNorm = 63.4663, GNorm = 1.3198, lr_0 = 1.2658e-04
Loss = 3.3502e-01, PNorm = 63.4680, GNorm = 1.4322, lr_0 = 1.2649e-04
Loss = 3.4423e-01, PNorm = 63.4693, GNorm = 1.3950, lr_0 = 1.2640e-04
Loss = 3.2681e-01, PNorm = 63.4686, GNorm = 1.3422, lr_0 = 1.2632e-04
Loss = 3.4928e-01, PNorm = 63.4682, GNorm = 1.3516, lr_0 = 1.2623e-04
Loss = 3.4081e-01, PNorm = 63.4688, GNorm = 1.3376, lr_0 = 1.2614e-04
Loss = 3.0274e-01, PNorm = 63.4685, GNorm = 1.3166, lr_0 = 1.2606e-04
Loss = 3.6207e-01, PNorm = 63.4688, GNorm = 2.0420, lr_0 = 1.2597e-04
Loss = 3.4201e-01, PNorm = 63.4716, GNorm = 1.3044, lr_0 = 1.2588e-04
Loss = 3.4120e-01, PNorm = 63.4716, GNorm = 1.4792, lr_0 = 1.2580e-04
Loss = 3.3268e-01, PNorm = 63.4728, GNorm = 1.4030, lr_0 = 1.2571e-04
Loss = 3.2601e-01, PNorm = 63.4739, GNorm = 1.3501, lr_0 = 1.2563e-04
Loss = 2.9401e-01, PNorm = 63.4739, GNorm = 1.5587, lr_0 = 1.2554e-04
Loss = 2.9904e-01, PNorm = 63.4737, GNorm = 1.2669, lr_0 = 1.2545e-04
Loss = 3.2147e-01, PNorm = 63.4752, GNorm = 1.1509, lr_0 = 1.2537e-04
Loss = 3.8747e-01, PNorm = 63.4774, GNorm = 1.5552, lr_0 = 1.2528e-04
Loss = 4.0551e-01, PNorm = 63.4780, GNorm = 1.8569, lr_0 = 1.2520e-04
Loss = 3.4031e-01, PNorm = 63.4789, GNorm = 1.2407, lr_0 = 1.2511e-04
Loss = 3.3312e-01, PNorm = 63.4801, GNorm = 1.4281, lr_0 = 1.2502e-04
Loss = 3.2256e-01, PNorm = 63.4807, GNorm = 1.2384, lr_0 = 1.2494e-04
Loss = 3.9530e-01, PNorm = 63.4824, GNorm = 2.2816, lr_0 = 1.2485e-04
Loss = 3.7511e-01, PNorm = 63.4849, GNorm = 1.3004, lr_0 = 1.2477e-04
Loss = 3.6806e-01, PNorm = 63.4876, GNorm = 2.2357, lr_0 = 1.2468e-04
Loss = 3.3628e-01, PNorm = 63.4893, GNorm = 1.1723, lr_0 = 1.2460e-04
Loss = 3.3607e-01, PNorm = 63.4909, GNorm = 1.2236, lr_0 = 1.2451e-04
Loss = 3.2737e-01, PNorm = 63.4934, GNorm = 1.6549, lr_0 = 1.2443e-04
Loss = 4.0156e-01, PNorm = 63.4943, GNorm = 1.6268, lr_0 = 1.2434e-04
Loss = 3.9036e-01, PNorm = 63.4942, GNorm = 1.1056, lr_0 = 1.2426e-04
Loss = 4.1758e-01, PNorm = 63.4947, GNorm = 1.5532, lr_0 = 1.2417e-04
Loss = 3.1984e-01, PNorm = 63.4947, GNorm = 1.2985, lr_0 = 1.2409e-04
Loss = 3.3703e-01, PNorm = 63.4954, GNorm = 1.4525, lr_0 = 1.2400e-04
Loss = 3.3477e-01, PNorm = 63.4957, GNorm = 1.5115, lr_0 = 1.2392e-04
Loss = 3.4531e-01, PNorm = 63.4966, GNorm = 1.2760, lr_0 = 1.2383e-04
Loss = 3.4269e-01, PNorm = 63.4992, GNorm = 1.1899, lr_0 = 1.2375e-04
Loss = 3.1634e-01, PNorm = 63.4994, GNorm = 1.6999, lr_0 = 1.2366e-04
Loss = 3.4785e-01, PNorm = 63.4982, GNorm = 1.6427, lr_0 = 1.2358e-04
Loss = 3.7222e-01, PNorm = 63.4997, GNorm = 1.6079, lr_0 = 1.2349e-04
Loss = 3.3797e-01, PNorm = 63.5010, GNorm = 1.1277, lr_0 = 1.2341e-04
Loss = 3.1825e-01, PNorm = 63.5018, GNorm = 1.2320, lr_0 = 1.2332e-04
Loss = 3.3280e-01, PNorm = 63.5039, GNorm = 1.2061, lr_0 = 1.2324e-04
Loss = 3.3271e-01, PNorm = 63.5065, GNorm = 1.7363, lr_0 = 1.2315e-04
Loss = 3.7931e-01, PNorm = 63.5076, GNorm = 1.1547, lr_0 = 1.2307e-04
Loss = 3.3304e-01, PNorm = 63.5079, GNorm = 1.2426, lr_0 = 1.2298e-04
Loss = 3.3573e-01, PNorm = 63.5087, GNorm = 1.4178, lr_0 = 1.2290e-04
Loss = 3.5813e-01, PNorm = 63.5099, GNorm = 1.6227, lr_0 = 1.2282e-04
Loss = 3.7480e-01, PNorm = 63.5110, GNorm = 1.8061, lr_0 = 1.2273e-04
Loss = 3.5178e-01, PNorm = 63.5117, GNorm = 1.2548, lr_0 = 1.2265e-04
Loss = 3.5893e-01, PNorm = 63.5137, GNorm = 1.7541, lr_0 = 1.2256e-04
Loss = 3.5047e-01, PNorm = 63.5144, GNorm = 1.3285, lr_0 = 1.2248e-04
Loss = 3.3378e-01, PNorm = 63.5140, GNorm = 1.3776, lr_0 = 1.2240e-04
Loss = 3.5356e-01, PNorm = 63.5146, GNorm = 1.4828, lr_0 = 1.2231e-04
Loss = 3.8929e-01, PNorm = 63.5151, GNorm = 2.0169, lr_0 = 1.2223e-04
Loss = 3.1735e-01, PNorm = 63.5165, GNorm = 1.5369, lr_0 = 1.2214e-04
Loss = 3.8352e-01, PNorm = 63.5186, GNorm = 1.0790, lr_0 = 1.2206e-04
Loss = 3.6304e-01, PNorm = 63.5208, GNorm = 1.2998, lr_0 = 1.2198e-04
Loss = 3.4549e-01, PNorm = 63.5215, GNorm = 1.7646, lr_0 = 1.2189e-04
Loss = 3.2191e-01, PNorm = 63.5213, GNorm = 0.9991, lr_0 = 1.2181e-04
Loss = 3.2772e-01, PNorm = 63.5225, GNorm = 1.8461, lr_0 = 1.2173e-04
Loss = 3.2157e-01, PNorm = 63.5259, GNorm = 1.7516, lr_0 = 1.2164e-04
Loss = 3.7205e-01, PNorm = 63.5280, GNorm = 1.5294, lr_0 = 1.2156e-04
Loss = 3.5826e-01, PNorm = 63.5278, GNorm = 1.3151, lr_0 = 1.2148e-04
Loss = 3.6475e-01, PNorm = 63.5291, GNorm = 1.4781, lr_0 = 1.2139e-04
Loss = 3.6628e-01, PNorm = 63.5309, GNorm = 1.6662, lr_0 = 1.2131e-04
Loss = 3.7538e-01, PNorm = 63.5332, GNorm = 2.2279, lr_0 = 1.2123e-04
Loss = 3.2367e-01, PNorm = 63.5354, GNorm = 1.5228, lr_0 = 1.2114e-04
Loss = 3.1943e-01, PNorm = 63.5370, GNorm = 1.2181, lr_0 = 1.2106e-04
Loss = 3.3759e-01, PNorm = 63.5377, GNorm = 1.6196, lr_0 = 1.2098e-04
Loss = 3.1899e-01, PNorm = 63.5391, GNorm = 1.3303, lr_0 = 1.2090e-04
Loss = 3.6430e-01, PNorm = 63.5400, GNorm = 1.7004, lr_0 = 1.2081e-04
Loss = 3.2422e-01, PNorm = 63.5398, GNorm = 1.0988, lr_0 = 1.2073e-04
Loss = 3.5960e-01, PNorm = 63.5385, GNorm = 1.2747, lr_0 = 1.2065e-04
Loss = 3.4930e-01, PNorm = 63.5386, GNorm = 1.4330, lr_0 = 1.2056e-04
Loss = 3.3783e-01, PNorm = 63.5397, GNorm = 1.3685, lr_0 = 1.2048e-04
Loss = 3.8258e-01, PNorm = 63.5403, GNorm = 1.5963, lr_0 = 1.2040e-04
Loss = 3.7301e-01, PNorm = 63.5424, GNorm = 1.7827, lr_0 = 1.2032e-04
Loss = 3.7268e-01, PNorm = 63.5448, GNorm = 1.6334, lr_0 = 1.2023e-04
Loss = 3.1352e-01, PNorm = 63.5454, GNorm = 1.5860, lr_0 = 1.2015e-04
Loss = 3.5060e-01, PNorm = 63.5450, GNorm = 1.3928, lr_0 = 1.2007e-04
Loss = 2.8549e-01, PNorm = 63.5469, GNorm = 1.6157, lr_0 = 1.1999e-04
Loss = 3.3941e-01, PNorm = 63.5500, GNorm = 1.6773, lr_0 = 1.1991e-04
Loss = 3.1274e-01, PNorm = 63.5512, GNorm = 1.7487, lr_0 = 1.1982e-04
Loss = 3.3203e-01, PNorm = 63.5503, GNorm = 1.2425, lr_0 = 1.1974e-04
Loss = 3.9718e-01, PNorm = 63.5515, GNorm = 1.2283, lr_0 = 1.1966e-04
Loss = 3.5375e-01, PNorm = 63.5548, GNorm = 1.5520, lr_0 = 1.1958e-04
Loss = 3.5822e-01, PNorm = 63.5563, GNorm = 1.6176, lr_0 = 1.1950e-04
Loss = 4.1856e-01, PNorm = 63.5568, GNorm = 1.3449, lr_0 = 1.1941e-04
Loss = 3.7526e-01, PNorm = 63.5596, GNorm = 1.6383, lr_0 = 1.1933e-04
Loss = 3.5474e-01, PNorm = 63.5624, GNorm = 1.3679, lr_0 = 1.1925e-04
Loss = 3.4471e-01, PNorm = 63.5664, GNorm = 2.0537, lr_0 = 1.1917e-04
Loss = 3.1514e-01, PNorm = 63.5678, GNorm = 2.1052, lr_0 = 1.1909e-04
Loss = 3.2045e-01, PNorm = 63.5682, GNorm = 1.3840, lr_0 = 1.1901e-04
Loss = 3.2263e-01, PNorm = 63.5672, GNorm = 1.6247, lr_0 = 1.1892e-04
Loss = 3.4276e-01, PNorm = 63.5683, GNorm = 2.2145, lr_0 = 1.1884e-04
Loss = 3.4754e-01, PNorm = 63.5698, GNorm = 1.5195, lr_0 = 1.1876e-04
Loss = 3.2062e-01, PNorm = 63.5701, GNorm = 1.4751, lr_0 = 1.1868e-04
Loss = 3.7723e-01, PNorm = 63.5695, GNorm = 1.4429, lr_0 = 1.1860e-04
Loss = 3.1105e-01, PNorm = 63.5697, GNorm = 1.5832, lr_0 = 1.1852e-04
Loss = 3.3900e-01, PNorm = 63.5699, GNorm = 1.2549, lr_0 = 1.1844e-04
Loss = 4.0865e-01, PNorm = 63.5704, GNorm = 1.5973, lr_0 = 1.1835e-04
Loss = 3.6800e-01, PNorm = 63.5719, GNorm = 1.4928, lr_0 = 1.1827e-04
Loss = 3.2806e-01, PNorm = 63.5739, GNorm = 0.8850, lr_0 = 1.1819e-04
Loss = 3.2085e-01, PNorm = 63.5740, GNorm = 1.4720, lr_0 = 1.1811e-04
Loss = 4.2138e-01, PNorm = 63.5729, GNorm = 1.6759, lr_0 = 1.1803e-04
Loss = 3.9554e-01, PNorm = 63.5726, GNorm = 1.6707, lr_0 = 1.1795e-04
Loss = 3.2963e-01, PNorm = 63.5733, GNorm = 1.6178, lr_0 = 1.1787e-04
Validation mae = 0.111610
Epoch 28
Loss = 3.6048e-01, PNorm = 63.5745, GNorm = 1.6804, lr_0 = 1.1779e-04
Loss = 3.6038e-01, PNorm = 63.5751, GNorm = 1.9918, lr_0 = 1.1771e-04
Loss = 3.7697e-01, PNorm = 63.5782, GNorm = 1.4104, lr_0 = 1.1763e-04
Loss = 3.3953e-01, PNorm = 63.5801, GNorm = 2.1027, lr_0 = 1.1755e-04
Loss = 3.9559e-01, PNorm = 63.5811, GNorm = 1.4467, lr_0 = 1.1747e-04
Loss = 3.6604e-01, PNorm = 63.5810, GNorm = 1.5966, lr_0 = 1.1739e-04
Loss = 3.6957e-01, PNorm = 63.5818, GNorm = 1.5014, lr_0 = 1.1730e-04
Loss = 3.6414e-01, PNorm = 63.5836, GNorm = 1.1508, lr_0 = 1.1722e-04
Loss = 3.0903e-01, PNorm = 63.5844, GNorm = 1.3236, lr_0 = 1.1714e-04
Loss = 3.4282e-01, PNorm = 63.5857, GNorm = 1.5249, lr_0 = 1.1706e-04
Loss = 3.9646e-01, PNorm = 63.5856, GNorm = 1.1275, lr_0 = 1.1698e-04
Loss = 3.0803e-01, PNorm = 63.5861, GNorm = 1.9268, lr_0 = 1.1690e-04
Loss = 3.1756e-01, PNorm = 63.5890, GNorm = 1.4135, lr_0 = 1.1682e-04
Loss = 3.5098e-01, PNorm = 63.5897, GNorm = 2.3511, lr_0 = 1.1674e-04
Loss = 3.2957e-01, PNorm = 63.5891, GNorm = 1.3229, lr_0 = 1.1666e-04
Loss = 3.4028e-01, PNorm = 63.5902, GNorm = 1.3943, lr_0 = 1.1658e-04
Loss = 3.2282e-01, PNorm = 63.5920, GNorm = 2.0164, lr_0 = 1.1650e-04
Loss = 2.9181e-01, PNorm = 63.5943, GNorm = 1.1297, lr_0 = 1.1642e-04
Loss = 3.4787e-01, PNorm = 63.5964, GNorm = 1.1423, lr_0 = 1.1634e-04
Loss = 3.2347e-01, PNorm = 63.5970, GNorm = 1.5158, lr_0 = 1.1626e-04
Loss = 3.0456e-01, PNorm = 63.5987, GNorm = 0.9817, lr_0 = 1.1618e-04
Loss = 4.1301e-01, PNorm = 63.6014, GNorm = 1.8712, lr_0 = 1.1611e-04
Loss = 3.4334e-01, PNorm = 63.6026, GNorm = 1.5693, lr_0 = 1.1603e-04
Loss = 3.4560e-01, PNorm = 63.6026, GNorm = 1.2474, lr_0 = 1.1595e-04
Loss = 3.2441e-01, PNorm = 63.6031, GNorm = 1.8011, lr_0 = 1.1587e-04
Loss = 3.9109e-01, PNorm = 63.6029, GNorm = 2.0472, lr_0 = 1.1579e-04
Loss = 3.1878e-01, PNorm = 63.6030, GNorm = 1.8921, lr_0 = 1.1571e-04
Loss = 3.9566e-01, PNorm = 63.6044, GNorm = 1.8274, lr_0 = 1.1563e-04
Loss = 3.3563e-01, PNorm = 63.6059, GNorm = 1.6938, lr_0 = 1.1555e-04
Loss = 3.5163e-01, PNorm = 63.6075, GNorm = 1.5753, lr_0 = 1.1547e-04
Loss = 3.3671e-01, PNorm = 63.6090, GNorm = 1.0440, lr_0 = 1.1539e-04
Loss = 3.1101e-01, PNorm = 63.6090, GNorm = 1.7711, lr_0 = 1.1531e-04
Loss = 3.2478e-01, PNorm = 63.6092, GNorm = 1.5212, lr_0 = 1.1523e-04
Loss = 3.0406e-01, PNorm = 63.6103, GNorm = 1.5041, lr_0 = 1.1515e-04
Loss = 2.9052e-01, PNorm = 63.6108, GNorm = 1.4448, lr_0 = 1.1508e-04
Loss = 3.1375e-01, PNorm = 63.6111, GNorm = 1.3460, lr_0 = 1.1500e-04
Loss = 3.6208e-01, PNorm = 63.6113, GNorm = 1.2022, lr_0 = 1.1492e-04
Loss = 3.2918e-01, PNorm = 63.6129, GNorm = 2.3053, lr_0 = 1.1484e-04
Loss = 3.6736e-01, PNorm = 63.6143, GNorm = 1.9576, lr_0 = 1.1476e-04
Loss = 3.4691e-01, PNorm = 63.6142, GNorm = 1.5975, lr_0 = 1.1468e-04
Loss = 2.9842e-01, PNorm = 63.6148, GNorm = 0.8882, lr_0 = 1.1460e-04
Loss = 3.3559e-01, PNorm = 63.6149, GNorm = 1.5515, lr_0 = 1.1452e-04
Loss = 3.3677e-01, PNorm = 63.6171, GNorm = 1.7659, lr_0 = 1.1445e-04
Loss = 3.5808e-01, PNorm = 63.6183, GNorm = 1.0665, lr_0 = 1.1437e-04
Loss = 3.5740e-01, PNorm = 63.6196, GNorm = 1.7015, lr_0 = 1.1429e-04
Loss = 3.8373e-01, PNorm = 63.6197, GNorm = 1.9328, lr_0 = 1.1421e-04
Loss = 3.3765e-01, PNorm = 63.6181, GNorm = 1.8645, lr_0 = 1.1413e-04
Loss = 2.8323e-01, PNorm = 63.6172, GNorm = 1.1015, lr_0 = 1.1405e-04
Loss = 3.6362e-01, PNorm = 63.6198, GNorm = 1.5218, lr_0 = 1.1398e-04
Loss = 3.0861e-01, PNorm = 63.6216, GNorm = 1.6858, lr_0 = 1.1390e-04
Loss = 3.8161e-01, PNorm = 63.6219, GNorm = 1.6098, lr_0 = 1.1382e-04
Loss = 3.3028e-01, PNorm = 63.6206, GNorm = 1.4732, lr_0 = 1.1374e-04
Loss = 3.7248e-01, PNorm = 63.6223, GNorm = 1.3946, lr_0 = 1.1366e-04
Loss = 3.6628e-01, PNorm = 63.6225, GNorm = 1.5534, lr_0 = 1.1359e-04
Loss = 3.3844e-01, PNorm = 63.6233, GNorm = 1.7592, lr_0 = 1.1351e-04
Loss = 3.6960e-01, PNorm = 63.6257, GNorm = 2.0235, lr_0 = 1.1343e-04
Loss = 3.8606e-01, PNorm = 63.6274, GNorm = 2.7477, lr_0 = 1.1335e-04
Loss = 3.6067e-01, PNorm = 63.6307, GNorm = 1.3548, lr_0 = 1.1328e-04
Loss = 3.5687e-01, PNorm = 63.6318, GNorm = 1.9361, lr_0 = 1.1320e-04
Loss = 4.0742e-01, PNorm = 63.6299, GNorm = 1.4190, lr_0 = 1.1312e-04
Loss = 3.3580e-01, PNorm = 63.6322, GNorm = 1.8446, lr_0 = 1.1304e-04
Loss = 3.2732e-01, PNorm = 63.6328, GNorm = 1.2962, lr_0 = 1.1297e-04
Loss = 3.1482e-01, PNorm = 63.6319, GNorm = 1.6641, lr_0 = 1.1289e-04
Loss = 3.1209e-01, PNorm = 63.6327, GNorm = 1.4572, lr_0 = 1.1281e-04
Loss = 3.3214e-01, PNorm = 63.6348, GNorm = 2.0121, lr_0 = 1.1273e-04
Loss = 3.6854e-01, PNorm = 63.6342, GNorm = 1.0657, lr_0 = 1.1266e-04
Loss = 3.6523e-01, PNorm = 63.6355, GNorm = 1.5451, lr_0 = 1.1258e-04
Loss = 3.1144e-01, PNorm = 63.6362, GNorm = 1.2101, lr_0 = 1.1250e-04
Loss = 3.4419e-01, PNorm = 63.6370, GNorm = 2.1054, lr_0 = 1.1243e-04
Loss = 3.3806e-01, PNorm = 63.6367, GNorm = 0.8225, lr_0 = 1.1235e-04
Loss = 3.7269e-01, PNorm = 63.6380, GNorm = 1.6915, lr_0 = 1.1227e-04
Loss = 3.4705e-01, PNorm = 63.6398, GNorm = 1.3678, lr_0 = 1.1219e-04
Loss = 3.2596e-01, PNorm = 63.6407, GNorm = 1.2659, lr_0 = 1.1212e-04
Loss = 3.4129e-01, PNorm = 63.6415, GNorm = 1.7746, lr_0 = 1.1204e-04
Loss = 3.4163e-01, PNorm = 63.6420, GNorm = 1.9307, lr_0 = 1.1196e-04
Loss = 3.3924e-01, PNorm = 63.6429, GNorm = 1.2362, lr_0 = 1.1189e-04
Loss = 3.9858e-01, PNorm = 63.6443, GNorm = 1.8296, lr_0 = 1.1181e-04
Loss = 3.3772e-01, PNorm = 63.6453, GNorm = 1.3334, lr_0 = 1.1173e-04
Loss = 3.2960e-01, PNorm = 63.6455, GNorm = 1.4880, lr_0 = 1.1166e-04
Loss = 3.4105e-01, PNorm = 63.6455, GNorm = 2.0167, lr_0 = 1.1158e-04
Loss = 3.7425e-01, PNorm = 63.6453, GNorm = 1.8277, lr_0 = 1.1150e-04
Loss = 3.6139e-01, PNorm = 63.6473, GNorm = 1.1493, lr_0 = 1.1143e-04
Loss = 3.4030e-01, PNorm = 63.6492, GNorm = 1.3885, lr_0 = 1.1135e-04
Loss = 3.2499e-01, PNorm = 63.6494, GNorm = 1.3050, lr_0 = 1.1128e-04
Loss = 3.8123e-01, PNorm = 63.6486, GNorm = 1.4913, lr_0 = 1.1120e-04
Loss = 3.5450e-01, PNorm = 63.6501, GNorm = 1.2299, lr_0 = 1.1112e-04
Loss = 3.8112e-01, PNorm = 63.6518, GNorm = 2.3313, lr_0 = 1.1105e-04
Loss = 3.2738e-01, PNorm = 63.6532, GNorm = 1.4432, lr_0 = 1.1097e-04
Loss = 3.1629e-01, PNorm = 63.6547, GNorm = 1.2808, lr_0 = 1.1089e-04
Loss = 3.5120e-01, PNorm = 63.6556, GNorm = 2.2165, lr_0 = 1.1082e-04
Loss = 3.0631e-01, PNorm = 63.6565, GNorm = 1.1094, lr_0 = 1.1074e-04
Loss = 3.4251e-01, PNorm = 63.6565, GNorm = 1.3794, lr_0 = 1.1067e-04
Loss = 4.1778e-01, PNorm = 63.6563, GNorm = 1.8421, lr_0 = 1.1059e-04
Loss = 3.4008e-01, PNorm = 63.6595, GNorm = 1.4217, lr_0 = 1.1052e-04
Loss = 3.4073e-01, PNorm = 63.6607, GNorm = 1.5485, lr_0 = 1.1044e-04
Loss = 3.3727e-01, PNorm = 63.6595, GNorm = 1.4601, lr_0 = 1.1036e-04
Loss = 3.7134e-01, PNorm = 63.6588, GNorm = 1.2144, lr_0 = 1.1029e-04
Loss = 3.2839e-01, PNorm = 63.6585, GNorm = 1.2858, lr_0 = 1.1021e-04
Loss = 3.6715e-01, PNorm = 63.6591, GNorm = 1.2564, lr_0 = 1.1014e-04
Loss = 3.4144e-01, PNorm = 63.6605, GNorm = 1.6393, lr_0 = 1.1006e-04
Loss = 3.7228e-01, PNorm = 63.6593, GNorm = 1.7902, lr_0 = 1.0999e-04
Loss = 3.4095e-01, PNorm = 63.6593, GNorm = 1.2682, lr_0 = 1.0991e-04
Loss = 3.5050e-01, PNorm = 63.6611, GNorm = 0.9784, lr_0 = 1.0984e-04
Loss = 3.1658e-01, PNorm = 63.6620, GNorm = 1.1700, lr_0 = 1.0976e-04
Loss = 2.9943e-01, PNorm = 63.6638, GNorm = 1.1809, lr_0 = 1.0969e-04
Loss = 3.1447e-01, PNorm = 63.6641, GNorm = 1.3483, lr_0 = 1.0961e-04
Loss = 3.4997e-01, PNorm = 63.6642, GNorm = 1.1822, lr_0 = 1.0954e-04
Loss = 3.4077e-01, PNorm = 63.6662, GNorm = 1.7108, lr_0 = 1.0946e-04
Loss = 3.3125e-01, PNorm = 63.6683, GNorm = 1.2005, lr_0 = 1.0939e-04
Loss = 3.4664e-01, PNorm = 63.6691, GNorm = 1.7563, lr_0 = 1.0931e-04
Loss = 3.5056e-01, PNorm = 63.6695, GNorm = 1.4539, lr_0 = 1.0924e-04
Loss = 3.6525e-01, PNorm = 63.6704, GNorm = 1.9654, lr_0 = 1.0916e-04
Loss = 3.3168e-01, PNorm = 63.6724, GNorm = 1.9158, lr_0 = 1.0909e-04
Loss = 3.2529e-01, PNorm = 63.6729, GNorm = 1.4586, lr_0 = 1.0901e-04
Loss = 3.6831e-01, PNorm = 63.6745, GNorm = 1.8334, lr_0 = 1.0894e-04
Loss = 3.9118e-01, PNorm = 63.6756, GNorm = 1.2561, lr_0 = 1.0886e-04
Loss = 3.4266e-01, PNorm = 63.6768, GNorm = 1.1634, lr_0 = 1.0879e-04
Loss = 3.5059e-01, PNorm = 63.6779, GNorm = 1.6021, lr_0 = 1.0871e-04
Loss = 3.6114e-01, PNorm = 63.6804, GNorm = 1.3798, lr_0 = 1.0864e-04
Loss = 3.0345e-01, PNorm = 63.6825, GNorm = 1.3686, lr_0 = 1.0856e-04
Validation mae = 0.110736
Epoch 29
Loss = 3.9300e-01, PNorm = 63.6830, GNorm = 1.5411, lr_0 = 1.0849e-04
Loss = 3.0801e-01, PNorm = 63.6833, GNorm = 1.6224, lr_0 = 1.0841e-04
Loss = 2.7830e-01, PNorm = 63.6839, GNorm = 1.5424, lr_0 = 1.0834e-04
Loss = 3.6979e-01, PNorm = 63.6846, GNorm = 1.3554, lr_0 = 1.0827e-04
Loss = 3.1977e-01, PNorm = 63.6867, GNorm = 1.2647, lr_0 = 1.0819e-04
Loss = 3.3764e-01, PNorm = 63.6882, GNorm = 1.4143, lr_0 = 1.0812e-04
Loss = 3.8031e-01, PNorm = 63.6882, GNorm = 1.7318, lr_0 = 1.0804e-04
Loss = 3.6044e-01, PNorm = 63.6888, GNorm = 1.2485, lr_0 = 1.0797e-04
Loss = 3.5375e-01, PNorm = 63.6896, GNorm = 1.2329, lr_0 = 1.0790e-04
Loss = 3.5554e-01, PNorm = 63.6899, GNorm = 1.0161, lr_0 = 1.0782e-04
Loss = 3.9717e-01, PNorm = 63.6894, GNorm = 1.4454, lr_0 = 1.0775e-04
Loss = 3.1229e-01, PNorm = 63.6909, GNorm = 1.7746, lr_0 = 1.0767e-04
Loss = 3.8139e-01, PNorm = 63.6927, GNorm = 1.3224, lr_0 = 1.0760e-04
Loss = 3.2238e-01, PNorm = 63.6924, GNorm = 1.4215, lr_0 = 1.0753e-04
Loss = 3.4238e-01, PNorm = 63.6929, GNorm = 1.6885, lr_0 = 1.0745e-04
Loss = 3.3339e-01, PNorm = 63.6928, GNorm = 1.6517, lr_0 = 1.0738e-04
Loss = 4.2641e-01, PNorm = 63.6940, GNorm = 2.1766, lr_0 = 1.0731e-04
Loss = 3.3362e-01, PNorm = 63.6963, GNorm = 2.2298, lr_0 = 1.0723e-04
Loss = 3.5944e-01, PNorm = 63.6974, GNorm = 1.6598, lr_0 = 1.0716e-04
Loss = 3.7507e-01, PNorm = 63.6995, GNorm = 1.4975, lr_0 = 1.0709e-04
Loss = 3.6867e-01, PNorm = 63.7010, GNorm = 1.1869, lr_0 = 1.0701e-04
Loss = 3.7983e-01, PNorm = 63.7024, GNorm = 1.9159, lr_0 = 1.0694e-04
Loss = 3.6043e-01, PNorm = 63.7029, GNorm = 1.6744, lr_0 = 1.0687e-04
Loss = 3.3796e-01, PNorm = 63.7031, GNorm = 1.6124, lr_0 = 1.0679e-04
Loss = 3.1076e-01, PNorm = 63.7048, GNorm = 1.0468, lr_0 = 1.0672e-04
Loss = 3.3538e-01, PNorm = 63.7052, GNorm = 1.4199, lr_0 = 1.0665e-04
Loss = 3.7912e-01, PNorm = 63.7059, GNorm = 2.1149, lr_0 = 1.0657e-04
Loss = 3.2845e-01, PNorm = 63.7077, GNorm = 1.4336, lr_0 = 1.0650e-04
Loss = 3.4096e-01, PNorm = 63.7088, GNorm = 1.6192, lr_0 = 1.0643e-04
Loss = 3.5913e-01, PNorm = 63.7097, GNorm = 1.4531, lr_0 = 1.0635e-04
Loss = 3.6943e-01, PNorm = 63.7093, GNorm = 1.8071, lr_0 = 1.0628e-04
Loss = 3.0097e-01, PNorm = 63.7091, GNorm = 1.3076, lr_0 = 1.0621e-04
Loss = 3.2297e-01, PNorm = 63.7101, GNorm = 1.9161, lr_0 = 1.0614e-04
Loss = 3.3714e-01, PNorm = 63.7100, GNorm = 1.2066, lr_0 = 1.0606e-04
Loss = 3.3473e-01, PNorm = 63.7116, GNorm = 2.0231, lr_0 = 1.0599e-04
Loss = 3.1761e-01, PNorm = 63.7150, GNorm = 1.3108, lr_0 = 1.0592e-04
Loss = 3.0538e-01, PNorm = 63.7146, GNorm = 2.0588, lr_0 = 1.0585e-04
Loss = 3.0810e-01, PNorm = 63.7147, GNorm = 1.6625, lr_0 = 1.0577e-04
Loss = 2.9701e-01, PNorm = 63.7152, GNorm = 1.3365, lr_0 = 1.0570e-04
Loss = 3.4605e-01, PNorm = 63.7162, GNorm = 2.6600, lr_0 = 1.0563e-04
Loss = 3.0704e-01, PNorm = 63.7181, GNorm = 1.4199, lr_0 = 1.0556e-04
Loss = 3.4977e-01, PNorm = 63.7186, GNorm = 1.5024, lr_0 = 1.0548e-04
Loss = 3.6492e-01, PNorm = 63.7193, GNorm = 1.2158, lr_0 = 1.0541e-04
Loss = 3.2690e-01, PNorm = 63.7208, GNorm = 1.3102, lr_0 = 1.0534e-04
Loss = 3.2666e-01, PNorm = 63.7220, GNorm = 1.4244, lr_0 = 1.0527e-04
Loss = 3.1438e-01, PNorm = 63.7213, GNorm = 1.3182, lr_0 = 1.0519e-04
Loss = 3.6235e-01, PNorm = 63.7215, GNorm = 1.1227, lr_0 = 1.0512e-04
Loss = 3.2617e-01, PNorm = 63.7222, GNorm = 1.1712, lr_0 = 1.0505e-04
Loss = 3.0566e-01, PNorm = 63.7244, GNorm = 1.3808, lr_0 = 1.0498e-04
Loss = 3.4020e-01, PNorm = 63.7261, GNorm = 1.2196, lr_0 = 1.0491e-04
Loss = 3.5359e-01, PNorm = 63.7273, GNorm = 2.1477, lr_0 = 1.0483e-04
Loss = 3.1681e-01, PNorm = 63.7285, GNorm = 1.2687, lr_0 = 1.0476e-04
Loss = 3.2829e-01, PNorm = 63.7291, GNorm = 1.4711, lr_0 = 1.0469e-04
Loss = 4.0038e-01, PNorm = 63.7291, GNorm = 1.3965, lr_0 = 1.0462e-04
Loss = 3.5294e-01, PNorm = 63.7297, GNorm = 1.2081, lr_0 = 1.0455e-04
Loss = 3.4306e-01, PNorm = 63.7304, GNorm = 1.3166, lr_0 = 1.0448e-04
Loss = 3.0365e-01, PNorm = 63.7307, GNorm = 1.5641, lr_0 = 1.0440e-04
Loss = 3.4247e-01, PNorm = 63.7303, GNorm = 1.9820, lr_0 = 1.0433e-04
Loss = 3.3813e-01, PNorm = 63.7313, GNorm = 1.6215, lr_0 = 1.0426e-04
Loss = 3.6947e-01, PNorm = 63.7325, GNorm = 1.3954, lr_0 = 1.0419e-04
Loss = 3.1859e-01, PNorm = 63.7330, GNorm = 1.7618, lr_0 = 1.0412e-04
Loss = 3.6325e-01, PNorm = 63.7352, GNorm = 1.7341, lr_0 = 1.0405e-04
Loss = 3.4086e-01, PNorm = 63.7370, GNorm = 1.4109, lr_0 = 1.0398e-04
Loss = 3.2682e-01, PNorm = 63.7392, GNorm = 1.3879, lr_0 = 1.0391e-04
Loss = 3.5272e-01, PNorm = 63.7408, GNorm = 1.9961, lr_0 = 1.0383e-04
Loss = 3.4518e-01, PNorm = 63.7419, GNorm = 1.4564, lr_0 = 1.0376e-04
Loss = 3.8294e-01, PNorm = 63.7414, GNorm = 1.4499, lr_0 = 1.0369e-04
Loss = 3.4376e-01, PNorm = 63.7420, GNorm = 1.5971, lr_0 = 1.0362e-04
Loss = 3.3344e-01, PNorm = 63.7434, GNorm = 1.8420, lr_0 = 1.0355e-04
Loss = 3.0692e-01, PNorm = 63.7447, GNorm = 1.3525, lr_0 = 1.0348e-04
Loss = 3.7113e-01, PNorm = 63.7457, GNorm = 1.7596, lr_0 = 1.0341e-04
Loss = 3.6006e-01, PNorm = 63.7463, GNorm = 1.1614, lr_0 = 1.0334e-04
Loss = 3.5719e-01, PNorm = 63.7456, GNorm = 1.1227, lr_0 = 1.0327e-04
Loss = 3.2065e-01, PNorm = 63.7457, GNorm = 1.2827, lr_0 = 1.0320e-04
Loss = 3.5084e-01, PNorm = 63.7462, GNorm = 1.1137, lr_0 = 1.0312e-04
Loss = 4.6462e-01, PNorm = 63.7474, GNorm = 2.9912, lr_0 = 1.0305e-04
Loss = 3.2659e-01, PNorm = 63.7486, GNorm = 1.0828, lr_0 = 1.0298e-04
Loss = 3.4150e-01, PNorm = 63.7504, GNorm = 1.3372, lr_0 = 1.0291e-04
Loss = 3.2609e-01, PNorm = 63.7510, GNorm = 1.0825, lr_0 = 1.0284e-04
Loss = 3.7328e-01, PNorm = 63.7513, GNorm = 1.8819, lr_0 = 1.0277e-04
Loss = 2.9053e-01, PNorm = 63.7521, GNorm = 1.7414, lr_0 = 1.0270e-04
Loss = 3.6190e-01, PNorm = 63.7538, GNorm = 1.4236, lr_0 = 1.0263e-04
Loss = 3.2468e-01, PNorm = 63.7540, GNorm = 1.5463, lr_0 = 1.0256e-04
Loss = 3.3740e-01, PNorm = 63.7553, GNorm = 1.1353, lr_0 = 1.0249e-04
Loss = 3.6023e-01, PNorm = 63.7565, GNorm = 1.4768, lr_0 = 1.0242e-04
Loss = 3.1601e-01, PNorm = 63.7572, GNorm = 1.4050, lr_0 = 1.0235e-04
Loss = 3.2886e-01, PNorm = 63.7572, GNorm = 1.7591, lr_0 = 1.0228e-04
Loss = 3.5627e-01, PNorm = 63.7587, GNorm = 1.3254, lr_0 = 1.0221e-04
Loss = 3.5585e-01, PNorm = 63.7578, GNorm = 2.4710, lr_0 = 1.0214e-04
Loss = 3.4852e-01, PNorm = 63.7582, GNorm = 1.4577, lr_0 = 1.0207e-04
Loss = 3.2806e-01, PNorm = 63.7600, GNorm = 0.9480, lr_0 = 1.0200e-04
Loss = 3.2771e-01, PNorm = 63.7622, GNorm = 1.2275, lr_0 = 1.0193e-04
Loss = 3.5302e-01, PNorm = 63.7641, GNorm = 1.1543, lr_0 = 1.0186e-04
Loss = 3.6818e-01, PNorm = 63.7641, GNorm = 1.3479, lr_0 = 1.0179e-04
Loss = 3.2743e-01, PNorm = 63.7639, GNorm = 1.1298, lr_0 = 1.0172e-04
Loss = 3.1911e-01, PNorm = 63.7631, GNorm = 1.6587, lr_0 = 1.0165e-04
Loss = 3.7182e-01, PNorm = 63.7628, GNorm = 1.2590, lr_0 = 1.0158e-04
Loss = 3.7093e-01, PNorm = 63.7641, GNorm = 1.8419, lr_0 = 1.0151e-04
Loss = 3.1001e-01, PNorm = 63.7653, GNorm = 1.8314, lr_0 = 1.0144e-04
Loss = 3.6979e-01, PNorm = 63.7665, GNorm = 1.6135, lr_0 = 1.0137e-04
Loss = 3.4999e-01, PNorm = 63.7666, GNorm = 1.3632, lr_0 = 1.0130e-04
Loss = 3.7526e-01, PNorm = 63.7678, GNorm = 1.6235, lr_0 = 1.0123e-04
Loss = 3.2997e-01, PNorm = 63.7689, GNorm = 1.3703, lr_0 = 1.0116e-04
Loss = 3.3659e-01, PNorm = 63.7692, GNorm = 1.6732, lr_0 = 1.0110e-04
Loss = 3.1150e-01, PNorm = 63.7696, GNorm = 1.3750, lr_0 = 1.0103e-04
Loss = 3.7471e-01, PNorm = 63.7707, GNorm = 1.5454, lr_0 = 1.0096e-04
Loss = 3.2206e-01, PNorm = 63.7715, GNorm = 1.8068, lr_0 = 1.0089e-04
Loss = 3.7783e-01, PNorm = 63.7725, GNorm = 1.4015, lr_0 = 1.0082e-04
Loss = 3.4642e-01, PNorm = 63.7734, GNorm = 1.7196, lr_0 = 1.0075e-04
Loss = 3.4723e-01, PNorm = 63.7734, GNorm = 1.2950, lr_0 = 1.0068e-04
Loss = 3.2078e-01, PNorm = 63.7746, GNorm = 1.5639, lr_0 = 1.0061e-04
Loss = 3.3253e-01, PNorm = 63.7773, GNorm = 1.4208, lr_0 = 1.0054e-04
Loss = 3.7518e-01, PNorm = 63.7784, GNorm = 1.8594, lr_0 = 1.0047e-04
Loss = 3.0414e-01, PNorm = 63.7791, GNorm = 1.5140, lr_0 = 1.0041e-04
Loss = 3.0285e-01, PNorm = 63.7782, GNorm = 1.9717, lr_0 = 1.0034e-04
Loss = 3.4696e-01, PNorm = 63.7770, GNorm = 1.5335, lr_0 = 1.0027e-04
Loss = 3.1695e-01, PNorm = 63.7777, GNorm = 1.4345, lr_0 = 1.0020e-04
Loss = 3.5642e-01, PNorm = 63.7791, GNorm = 1.7978, lr_0 = 1.0013e-04
Loss = 3.5766e-01, PNorm = 63.7793, GNorm = 1.4675, lr_0 = 1.0006e-04
Loss = 4.0052e-01, PNorm = 63.7799, GNorm = 1.4299, lr_0 = 1.0000e-04
Validation mae = 0.110796
Model 0 best validation mae = 0.110736 on epoch 28
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.110959
Ensemble test mae = 0.110959
Fold 2
Splitting data with seed 2
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=300, bias=False)
        (W_h): Linear(in_features=300, out_features=300, bias=False)
        (W_o): Linear(in_features=433, out_features=300, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.2, inplace=False)
    (1): Linear(in_features=300, out_features=300, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=300, out_features=300, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.2, inplace=False)
    (7): Linear(in_features=300, out_features=1, bias=True)
  )
)
Number of parameters = 445,201
Moving model to cuda
Epoch 0
Loss = 9.8317e-01, PNorm = 38.1709, GNorm = 3.4202, lr_0 = 1.0413e-04
Loss = 8.8341e-01, PNorm = 38.1706, GNorm = 3.7110, lr_0 = 1.0788e-04
Loss = 9.8448e-01, PNorm = 38.1707, GNorm = 4.1624, lr_0 = 1.1163e-04
Loss = 1.0428e+00, PNorm = 38.1711, GNorm = 3.0002, lr_0 = 1.1537e-04
Loss = 9.5582e-01, PNorm = 38.1719, GNorm = 2.6358, lr_0 = 1.1913e-04
Loss = 9.2394e-01, PNorm = 38.1725, GNorm = 2.2849, lr_0 = 1.2287e-04
Loss = 9.5693e-01, PNorm = 38.1733, GNorm = 2.6613, lr_0 = 1.2663e-04
Loss = 9.4733e-01, PNorm = 38.1745, GNorm = 2.3924, lr_0 = 1.3038e-04
Loss = 9.0703e-01, PNorm = 38.1757, GNorm = 1.9789, lr_0 = 1.3413e-04
Loss = 9.1617e-01, PNorm = 38.1756, GNorm = 1.9406, lr_0 = 1.3788e-04
Loss = 9.5551e-01, PNorm = 38.1762, GNorm = 1.9960, lr_0 = 1.4163e-04
Loss = 9.3980e-01, PNorm = 38.1779, GNorm = 5.3925, lr_0 = 1.4537e-04
Loss = 9.0252e-01, PNorm = 38.1798, GNorm = 1.6102, lr_0 = 1.4913e-04
Loss = 9.6295e-01, PNorm = 38.1820, GNorm = 2.0075, lr_0 = 1.5288e-04
Loss = 7.8152e-01, PNorm = 38.1845, GNorm = 2.6108, lr_0 = 1.5662e-04
Loss = 7.8207e-01, PNorm = 38.1866, GNorm = 2.7521, lr_0 = 1.6038e-04
Loss = 7.9740e-01, PNorm = 38.1887, GNorm = 3.5093, lr_0 = 1.6412e-04
Loss = 8.2521e-01, PNorm = 38.1912, GNorm = 4.7398, lr_0 = 1.6788e-04
Loss = 8.2898e-01, PNorm = 38.1938, GNorm = 2.3739, lr_0 = 1.7163e-04
Loss = 7.8585e-01, PNorm = 38.1967, GNorm = 3.5733, lr_0 = 1.7538e-04
Loss = 6.6464e-01, PNorm = 38.1995, GNorm = 2.2038, lr_0 = 1.7913e-04
Loss = 8.6944e-01, PNorm = 38.2036, GNorm = 2.3174, lr_0 = 1.8288e-04
Loss = 8.3024e-01, PNorm = 38.2066, GNorm = 4.7630, lr_0 = 1.8662e-04
Loss = 7.5766e-01, PNorm = 38.2095, GNorm = 4.0837, lr_0 = 1.9038e-04
Loss = 7.5086e-01, PNorm = 38.2121, GNorm = 1.7676, lr_0 = 1.9413e-04
Loss = 8.0670e-01, PNorm = 38.2157, GNorm = 4.4895, lr_0 = 1.9788e-04
Loss = 7.5802e-01, PNorm = 38.2180, GNorm = 5.6915, lr_0 = 2.0163e-04
Loss = 7.1402e-01, PNorm = 38.2218, GNorm = 6.9308, lr_0 = 2.0537e-04
Loss = 7.1588e-01, PNorm = 38.2254, GNorm = 6.2166, lr_0 = 2.0913e-04
Loss = 7.8695e-01, PNorm = 38.2274, GNorm = 9.1163, lr_0 = 2.1288e-04
Loss = 7.7679e-01, PNorm = 38.2275, GNorm = 5.4949, lr_0 = 2.1663e-04
Loss = 7.6694e-01, PNorm = 38.2297, GNorm = 4.7412, lr_0 = 2.2038e-04
Loss = 7.6546e-01, PNorm = 38.2325, GNorm = 2.0599, lr_0 = 2.2412e-04
Loss = 7.9101e-01, PNorm = 38.2364, GNorm = 2.7435, lr_0 = 2.2787e-04
Loss = 8.5779e-01, PNorm = 38.2416, GNorm = 1.5634, lr_0 = 2.3163e-04
Loss = 6.7004e-01, PNorm = 38.2470, GNorm = 5.0284, lr_0 = 2.3538e-04
Loss = 7.2520e-01, PNorm = 38.2511, GNorm = 4.7403, lr_0 = 2.3913e-04
Loss = 6.4393e-01, PNorm = 38.2533, GNorm = 1.8027, lr_0 = 2.4288e-04
Loss = 6.9327e-01, PNorm = 38.2543, GNorm = 4.7973, lr_0 = 2.4662e-04
Loss = 6.6494e-01, PNorm = 38.2569, GNorm = 3.2136, lr_0 = 2.5038e-04
Loss = 6.9419e-01, PNorm = 38.2618, GNorm = 5.7112, lr_0 = 2.5413e-04
Loss = 7.4459e-01, PNorm = 38.2638, GNorm = 2.8727, lr_0 = 2.5788e-04
Loss = 6.8726e-01, PNorm = 38.2671, GNorm = 3.5125, lr_0 = 2.6163e-04
Loss = 6.6886e-01, PNorm = 38.2696, GNorm = 6.2009, lr_0 = 2.6537e-04
Loss = 6.5162e-01, PNorm = 38.2708, GNorm = 3.5439, lr_0 = 2.6912e-04
Loss = 6.7118e-01, PNorm = 38.2743, GNorm = 4.9721, lr_0 = 2.7288e-04
Loss = 7.2158e-01, PNorm = 38.2781, GNorm = 3.5199, lr_0 = 2.7663e-04
Loss = 7.2915e-01, PNorm = 38.2819, GNorm = 3.0221, lr_0 = 2.8038e-04
Loss = 7.0345e-01, PNorm = 38.2836, GNorm = 2.7082, lr_0 = 2.8413e-04
Loss = 6.3402e-01, PNorm = 38.2875, GNorm = 1.5162, lr_0 = 2.8787e-04
Loss = 6.6799e-01, PNorm = 38.2939, GNorm = 1.4798, lr_0 = 2.9163e-04
Loss = 6.0595e-01, PNorm = 38.2975, GNorm = 2.6470, lr_0 = 2.9538e-04
Loss = 7.6379e-01, PNorm = 38.2999, GNorm = 1.9141, lr_0 = 2.9913e-04
Loss = 7.1510e-01, PNorm = 38.3048, GNorm = 4.4985, lr_0 = 3.0288e-04
Loss = 7.3658e-01, PNorm = 38.3086, GNorm = 8.2551, lr_0 = 3.0662e-04
Loss = 7.3017e-01, PNorm = 38.3120, GNorm = 1.3462, lr_0 = 3.1037e-04
Loss = 6.9656e-01, PNorm = 38.3191, GNorm = 6.7756, lr_0 = 3.1413e-04
Loss = 5.9471e-01, PNorm = 38.3227, GNorm = 4.1004, lr_0 = 3.1788e-04
Loss = 6.6407e-01, PNorm = 38.3282, GNorm = 2.2386, lr_0 = 3.2163e-04
Loss = 7.1424e-01, PNorm = 38.3305, GNorm = 5.0947, lr_0 = 3.2538e-04
Loss = 7.0372e-01, PNorm = 38.3338, GNorm = 1.8306, lr_0 = 3.2912e-04
Loss = 7.5670e-01, PNorm = 38.3398, GNorm = 10.2705, lr_0 = 3.3288e-04
Loss = 5.8170e-01, PNorm = 38.3436, GNorm = 4.1156, lr_0 = 3.3663e-04
Loss = 6.9749e-01, PNorm = 38.3480, GNorm = 5.8096, lr_0 = 3.4038e-04
Loss = 6.6473e-01, PNorm = 38.3519, GNorm = 4.3149, lr_0 = 3.4413e-04
Loss = 7.6612e-01, PNorm = 38.3576, GNorm = 3.8622, lr_0 = 3.4787e-04
Loss = 6.6704e-01, PNorm = 38.3626, GNorm = 2.4388, lr_0 = 3.5162e-04
Loss = 6.9142e-01, PNorm = 38.3719, GNorm = 6.5742, lr_0 = 3.5538e-04
Loss = 6.6146e-01, PNorm = 38.3738, GNorm = 3.8000, lr_0 = 3.5913e-04
Loss = 6.8054e-01, PNorm = 38.3799, GNorm = 2.5862, lr_0 = 3.6288e-04
Loss = 7.9291e-01, PNorm = 38.3865, GNorm = 12.9057, lr_0 = 3.6662e-04
Loss = 7.0608e-01, PNorm = 38.3916, GNorm = 3.1786, lr_0 = 3.7037e-04
Loss = 6.7004e-01, PNorm = 38.4002, GNorm = 5.3135, lr_0 = 3.7413e-04
Loss = 6.5460e-01, PNorm = 38.4046, GNorm = 2.0993, lr_0 = 3.7788e-04
Loss = 6.0367e-01, PNorm = 38.4100, GNorm = 9.9175, lr_0 = 3.8163e-04
Loss = 6.8009e-01, PNorm = 38.4171, GNorm = 2.7916, lr_0 = 3.8537e-04
Loss = 7.2678e-01, PNorm = 38.4235, GNorm = 7.4267, lr_0 = 3.8912e-04
Loss = 6.3728e-01, PNorm = 38.4306, GNorm = 3.8668, lr_0 = 3.9287e-04
Loss = 6.3499e-01, PNorm = 38.4344, GNorm = 3.0958, lr_0 = 3.9663e-04
Loss = 7.5507e-01, PNorm = 38.4420, GNorm = 18.0240, lr_0 = 4.0038e-04
Loss = 7.0341e-01, PNorm = 38.4446, GNorm = 3.0217, lr_0 = 4.0413e-04
Loss = 6.5510e-01, PNorm = 38.4558, GNorm = 3.8381, lr_0 = 4.0787e-04
Loss = 6.8454e-01, PNorm = 38.4650, GNorm = 1.9211, lr_0 = 4.1162e-04
Loss = 7.1725e-01, PNorm = 38.4688, GNorm = 1.8718, lr_0 = 4.1537e-04
Loss = 5.9796e-01, PNorm = 38.4768, GNorm = 3.7491, lr_0 = 4.1913e-04
Loss = 6.0559e-01, PNorm = 38.4837, GNorm = 3.0663, lr_0 = 4.2288e-04
Loss = 6.9522e-01, PNorm = 38.4868, GNorm = 1.8965, lr_0 = 4.2662e-04
Loss = 7.8683e-01, PNorm = 38.4958, GNorm = 4.9253, lr_0 = 4.3037e-04
Loss = 7.3249e-01, PNorm = 38.5060, GNorm = 8.0892, lr_0 = 4.3412e-04
Loss = 6.1960e-01, PNorm = 38.5133, GNorm = 2.8638, lr_0 = 4.3788e-04
Loss = 6.4398e-01, PNorm = 38.5172, GNorm = 4.0356, lr_0 = 4.4163e-04
Loss = 6.4807e-01, PNorm = 38.5246, GNorm = 2.1985, lr_0 = 4.4538e-04
Loss = 6.6222e-01, PNorm = 38.5327, GNorm = 3.9542, lr_0 = 4.4912e-04
Loss = 7.4400e-01, PNorm = 38.5354, GNorm = 3.1205, lr_0 = 4.5287e-04
Loss = 7.2130e-01, PNorm = 38.5433, GNorm = 0.9941, lr_0 = 4.5662e-04
Loss = 6.6999e-01, PNorm = 38.5592, GNorm = 5.4557, lr_0 = 4.6038e-04
Loss = 6.5960e-01, PNorm = 38.5680, GNorm = 4.3706, lr_0 = 4.6413e-04
Loss = 6.2404e-01, PNorm = 38.5749, GNorm = 4.7201, lr_0 = 4.6787e-04
Loss = 6.3411e-01, PNorm = 38.5808, GNorm = 2.2435, lr_0 = 4.7162e-04
Loss = 6.0975e-01, PNorm = 38.5878, GNorm = 3.9840, lr_0 = 4.7537e-04
Loss = 7.6308e-01, PNorm = 38.5950, GNorm = 1.0050, lr_0 = 4.7913e-04
Loss = 6.6085e-01, PNorm = 38.6082, GNorm = 3.2920, lr_0 = 4.8288e-04
Loss = 6.2291e-01, PNorm = 38.6139, GNorm = 1.3937, lr_0 = 4.8663e-04
Loss = 5.8817e-01, PNorm = 38.6219, GNorm = 2.6915, lr_0 = 4.9038e-04
Loss = 5.3897e-01, PNorm = 38.6286, GNorm = 3.2148, lr_0 = 4.9412e-04
Loss = 5.6643e-01, PNorm = 38.6360, GNorm = 3.6328, lr_0 = 4.9788e-04
Loss = 6.5759e-01, PNorm = 38.6450, GNorm = 4.6379, lr_0 = 5.0163e-04
Loss = 6.4935e-01, PNorm = 38.6537, GNorm = 1.7673, lr_0 = 5.0538e-04
Loss = 6.7966e-01, PNorm = 38.6630, GNorm = 1.8423, lr_0 = 5.0913e-04
Loss = 6.0900e-01, PNorm = 38.6750, GNorm = 1.1187, lr_0 = 5.1287e-04
Loss = 6.3123e-01, PNorm = 38.6814, GNorm = 1.7842, lr_0 = 5.1663e-04
Loss = 7.0830e-01, PNorm = 38.6859, GNorm = 7.0114, lr_0 = 5.2038e-04
Loss = 6.1112e-01, PNorm = 38.6934, GNorm = 2.7806, lr_0 = 5.2413e-04
Loss = 6.1380e-01, PNorm = 38.7053, GNorm = 4.2653, lr_0 = 5.2788e-04
Loss = 6.8903e-01, PNorm = 38.7119, GNorm = 1.1764, lr_0 = 5.3162e-04
Loss = 6.1048e-01, PNorm = 38.7226, GNorm = 1.5898, lr_0 = 5.3538e-04
Loss = 5.9605e-01, PNorm = 38.7337, GNorm = 3.8467, lr_0 = 5.3912e-04
Loss = 6.0869e-01, PNorm = 38.7403, GNorm = 2.2083, lr_0 = 5.4288e-04
Loss = 5.9524e-01, PNorm = 38.7497, GNorm = 5.9195, lr_0 = 5.4663e-04
Loss = 6.3027e-01, PNorm = 38.7606, GNorm = 1.6443, lr_0 = 5.5038e-04
Validation mae = 0.138883
Epoch 1
Loss = 6.1718e-01, PNorm = 38.7719, GNorm = 2.0918, lr_0 = 5.5413e-04
Loss = 7.1613e-01, PNorm = 38.7806, GNorm = 1.6457, lr_0 = 5.5787e-04
Loss = 5.8829e-01, PNorm = 38.7909, GNorm = 2.0299, lr_0 = 5.6163e-04
Loss = 6.1745e-01, PNorm = 38.8018, GNorm = 4.9859, lr_0 = 5.6538e-04
Loss = 7.2599e-01, PNorm = 38.8111, GNorm = 1.5289, lr_0 = 5.6913e-04
Loss = 6.3100e-01, PNorm = 38.8231, GNorm = 3.4254, lr_0 = 5.7288e-04
Loss = 6.3145e-01, PNorm = 38.8380, GNorm = 4.4981, lr_0 = 5.7662e-04
Loss = 5.8561e-01, PNorm = 38.8483, GNorm = 1.3996, lr_0 = 5.8038e-04
Loss = 6.2546e-01, PNorm = 38.8571, GNorm = 10.8586, lr_0 = 5.8413e-04
Loss = 5.5201e-01, PNorm = 38.8650, GNorm = 3.6876, lr_0 = 5.8788e-04
Loss = 6.4993e-01, PNorm = 38.8725, GNorm = 2.0657, lr_0 = 5.9163e-04
Loss = 5.7172e-01, PNorm = 38.8841, GNorm = 3.1028, lr_0 = 5.9538e-04
Loss = 5.8012e-01, PNorm = 38.8956, GNorm = 5.6773, lr_0 = 5.9913e-04
Loss = 6.3051e-01, PNorm = 38.9100, GNorm = 3.8665, lr_0 = 6.0288e-04
Loss = 6.1042e-01, PNorm = 38.9239, GNorm = 1.8295, lr_0 = 6.0663e-04
Loss = 6.2693e-01, PNorm = 38.9336, GNorm = 6.9704, lr_0 = 6.1038e-04
Loss = 6.1599e-01, PNorm = 38.9443, GNorm = 3.2963, lr_0 = 6.1413e-04
Loss = 6.1852e-01, PNorm = 38.9510, GNorm = 1.5134, lr_0 = 6.1788e-04
Loss = 6.4137e-01, PNorm = 38.9607, GNorm = 1.2456, lr_0 = 6.2163e-04
Loss = 6.1149e-01, PNorm = 38.9739, GNorm = 2.7420, lr_0 = 6.2538e-04
Loss = 5.9950e-01, PNorm = 38.9830, GNorm = 7.4085, lr_0 = 6.2913e-04
Loss = 6.1786e-01, PNorm = 38.9917, GNorm = 4.9191, lr_0 = 6.3288e-04
Loss = 5.7208e-01, PNorm = 39.0041, GNorm = 1.4030, lr_0 = 6.3663e-04
Loss = 6.3316e-01, PNorm = 39.0155, GNorm = 4.8205, lr_0 = 6.4038e-04
Loss = 6.0369e-01, PNorm = 39.0287, GNorm = 2.8757, lr_0 = 6.4413e-04
Loss = 5.6026e-01, PNorm = 39.0431, GNorm = 2.2250, lr_0 = 6.4788e-04
Loss = 5.9945e-01, PNorm = 39.0593, GNorm = 2.2661, lr_0 = 6.5163e-04
Loss = 5.6188e-01, PNorm = 39.0722, GNorm = 5.2782, lr_0 = 6.5538e-04
Loss = 6.6544e-01, PNorm = 39.0850, GNorm = 1.4856, lr_0 = 6.5913e-04
Loss = 5.8296e-01, PNorm = 39.1004, GNorm = 1.2284, lr_0 = 6.6288e-04
Loss = 5.7246e-01, PNorm = 39.1100, GNorm = 1.3582, lr_0 = 6.6663e-04
Loss = 5.5072e-01, PNorm = 39.1228, GNorm = 1.0600, lr_0 = 6.7038e-04
Loss = 4.9567e-01, PNorm = 39.1379, GNorm = 4.9730, lr_0 = 6.7413e-04
Loss = 5.7648e-01, PNorm = 39.1499, GNorm = 2.7971, lr_0 = 6.7788e-04
Loss = 6.2698e-01, PNorm = 39.1657, GNorm = 3.3982, lr_0 = 6.8163e-04
Loss = 6.2659e-01, PNorm = 39.1781, GNorm = 1.6241, lr_0 = 6.8538e-04
Loss = 6.1413e-01, PNorm = 39.1922, GNorm = 1.8794, lr_0 = 6.8913e-04
Loss = 6.6683e-01, PNorm = 39.2116, GNorm = 1.3451, lr_0 = 6.9288e-04
Loss = 5.7611e-01, PNorm = 39.2249, GNorm = 3.2426, lr_0 = 6.9663e-04
Loss = 5.8050e-01, PNorm = 39.2489, GNorm = 1.9248, lr_0 = 7.0038e-04
Loss = 6.7321e-01, PNorm = 39.2612, GNorm = 1.5766, lr_0 = 7.0413e-04
Loss = 6.0004e-01, PNorm = 39.2749, GNorm = 5.2701, lr_0 = 7.0788e-04
Loss = 6.3165e-01, PNorm = 39.2829, GNorm = 1.5250, lr_0 = 7.1163e-04
Loss = 5.2588e-01, PNorm = 39.2989, GNorm = 1.0496, lr_0 = 7.1538e-04
Loss = 6.0890e-01, PNorm = 39.3138, GNorm = 2.3038, lr_0 = 7.1913e-04
Loss = 6.5910e-01, PNorm = 39.3285, GNorm = 2.0144, lr_0 = 7.2288e-04
Loss = 6.5961e-01, PNorm = 39.3433, GNorm = 2.7717, lr_0 = 7.2663e-04
Loss = 5.1707e-01, PNorm = 39.3603, GNorm = 2.6487, lr_0 = 7.3038e-04
Loss = 5.6472e-01, PNorm = 39.3723, GNorm = 4.5948, lr_0 = 7.3413e-04
Loss = 6.1488e-01, PNorm = 39.3806, GNorm = 4.7392, lr_0 = 7.3788e-04
Loss = 5.0560e-01, PNorm = 39.3933, GNorm = 1.2782, lr_0 = 7.4163e-04
Loss = 5.6781e-01, PNorm = 39.4142, GNorm = 2.7093, lr_0 = 7.4538e-04
Loss = 5.0877e-01, PNorm = 39.4365, GNorm = 2.1621, lr_0 = 7.4913e-04
Loss = 6.1147e-01, PNorm = 39.4502, GNorm = 1.9415, lr_0 = 7.5288e-04
Loss = 6.2728e-01, PNorm = 39.4558, GNorm = 2.6732, lr_0 = 7.5663e-04
Loss = 6.3299e-01, PNorm = 39.4695, GNorm = 1.0958, lr_0 = 7.6038e-04
Loss = 5.7244e-01, PNorm = 39.4850, GNorm = 1.2989, lr_0 = 7.6413e-04
Loss = 5.5942e-01, PNorm = 39.5041, GNorm = 0.9338, lr_0 = 7.6788e-04
Loss = 5.9343e-01, PNorm = 39.5234, GNorm = 2.4973, lr_0 = 7.7163e-04
Loss = 5.7997e-01, PNorm = 39.5379, GNorm = 1.4388, lr_0 = 7.7538e-04
Loss = 6.3384e-01, PNorm = 39.5547, GNorm = 2.6105, lr_0 = 7.7913e-04
Loss = 6.7955e-01, PNorm = 39.5675, GNorm = 3.8330, lr_0 = 7.8288e-04
Loss = 5.4880e-01, PNorm = 39.5822, GNorm = 1.2594, lr_0 = 7.8663e-04
Loss = 5.8136e-01, PNorm = 39.5983, GNorm = 3.2040, lr_0 = 7.9038e-04
Loss = 5.3099e-01, PNorm = 39.6127, GNorm = 4.4906, lr_0 = 7.9413e-04
Loss = 5.4170e-01, PNorm = 39.6237, GNorm = 4.6437, lr_0 = 7.9788e-04
Loss = 6.2169e-01, PNorm = 39.6431, GNorm = 4.5403, lr_0 = 8.0163e-04
Loss = 5.6478e-01, PNorm = 39.6497, GNorm = 1.3050, lr_0 = 8.0538e-04
Loss = 6.4083e-01, PNorm = 39.6639, GNorm = 1.4497, lr_0 = 8.0913e-04
Loss = 5.8671e-01, PNorm = 39.6813, GNorm = 1.2880, lr_0 = 8.1288e-04
Loss = 6.1917e-01, PNorm = 39.6964, GNorm = 2.3768, lr_0 = 8.1663e-04
Loss = 6.3392e-01, PNorm = 39.7098, GNorm = 3.8087, lr_0 = 8.2038e-04
Loss = 6.2780e-01, PNorm = 39.7256, GNorm = 1.2237, lr_0 = 8.2413e-04
Loss = 5.4436e-01, PNorm = 39.7395, GNorm = 0.9960, lr_0 = 8.2788e-04
Loss = 6.0500e-01, PNorm = 39.7589, GNorm = 1.7718, lr_0 = 8.3163e-04
Loss = 6.2928e-01, PNorm = 39.7846, GNorm = 1.8311, lr_0 = 8.3538e-04
Loss = 5.2199e-01, PNorm = 39.7909, GNorm = 1.0852, lr_0 = 8.3913e-04
Loss = 6.1968e-01, PNorm = 39.8026, GNorm = 3.6688, lr_0 = 8.4288e-04
Loss = 5.9417e-01, PNorm = 39.8300, GNorm = 3.1530, lr_0 = 8.4663e-04
Loss = 5.0858e-01, PNorm = 39.8515, GNorm = 1.2708, lr_0 = 8.5038e-04
Loss = 7.2793e-01, PNorm = 39.8748, GNorm = 2.2993, lr_0 = 8.5413e-04
Loss = 5.5122e-01, PNorm = 39.8924, GNorm = 4.9685, lr_0 = 8.5788e-04
Loss = 5.7807e-01, PNorm = 39.9132, GNorm = 3.2507, lr_0 = 8.6163e-04
Loss = 5.6329e-01, PNorm = 39.9270, GNorm = 1.4473, lr_0 = 8.6538e-04
Loss = 5.5226e-01, PNorm = 39.9479, GNorm = 2.6600, lr_0 = 8.6913e-04
Loss = 5.7494e-01, PNorm = 39.9665, GNorm = 7.0050, lr_0 = 8.7288e-04
Loss = 5.6658e-01, PNorm = 39.9829, GNorm = 1.3610, lr_0 = 8.7663e-04
Loss = 5.8719e-01, PNorm = 40.0112, GNorm = 9.7131, lr_0 = 8.8038e-04
Loss = 6.4502e-01, PNorm = 40.0345, GNorm = 3.4559, lr_0 = 8.8413e-04
Loss = 6.2076e-01, PNorm = 40.0558, GNorm = 2.8925, lr_0 = 8.8788e-04
Loss = 6.1322e-01, PNorm = 40.0803, GNorm = 2.0725, lr_0 = 8.9163e-04
Loss = 6.7785e-01, PNorm = 40.0978, GNorm = 4.0758, lr_0 = 8.9538e-04
Loss = 5.6999e-01, PNorm = 40.1141, GNorm = 5.3669, lr_0 = 8.9913e-04
Loss = 5.4713e-01, PNorm = 40.1227, GNorm = 0.9902, lr_0 = 9.0288e-04
Loss = 5.6029e-01, PNorm = 40.1311, GNorm = 2.4054, lr_0 = 9.0663e-04
Loss = 6.4713e-01, PNorm = 40.1484, GNorm = 1.5728, lr_0 = 9.1038e-04
Loss = 5.9176e-01, PNorm = 40.1628, GNorm = 2.8618, lr_0 = 9.1413e-04
Loss = 5.0563e-01, PNorm = 40.1708, GNorm = 2.8910, lr_0 = 9.1788e-04
Loss = 5.5168e-01, PNorm = 40.1867, GNorm = 1.8722, lr_0 = 9.2163e-04
Loss = 5.5582e-01, PNorm = 40.2022, GNorm = 1.2690, lr_0 = 9.2538e-04
Loss = 5.5030e-01, PNorm = 40.2267, GNorm = 2.7244, lr_0 = 9.2913e-04
Loss = 5.8927e-01, PNorm = 40.2478, GNorm = 1.4481, lr_0 = 9.3288e-04
Loss = 5.5244e-01, PNorm = 40.2692, GNorm = 1.5763, lr_0 = 9.3663e-04
Loss = 6.6842e-01, PNorm = 40.2869, GNorm = 1.4469, lr_0 = 9.4038e-04
Loss = 5.1343e-01, PNorm = 40.3125, GNorm = 1.3384, lr_0 = 9.4413e-04
Loss = 4.9334e-01, PNorm = 40.3382, GNorm = 2.4180, lr_0 = 9.4788e-04
Loss = 5.6375e-01, PNorm = 40.3595, GNorm = 1.5599, lr_0 = 9.5163e-04
Loss = 5.4214e-01, PNorm = 40.3664, GNorm = 1.0126, lr_0 = 9.5538e-04
Loss = 5.2139e-01, PNorm = 40.3754, GNorm = 1.3513, lr_0 = 9.5913e-04
Loss = 5.0486e-01, PNorm = 40.3948, GNorm = 2.1592, lr_0 = 9.6288e-04
Loss = 6.2831e-01, PNorm = 40.4139, GNorm = 3.1372, lr_0 = 9.6663e-04
Loss = 6.0513e-01, PNorm = 40.4362, GNorm = 1.6078, lr_0 = 9.7038e-04
Loss = 5.6348e-01, PNorm = 40.4468, GNorm = 2.8704, lr_0 = 9.7413e-04
Loss = 6.0629e-01, PNorm = 40.4588, GNorm = 1.7787, lr_0 = 9.7788e-04
Loss = 4.7196e-01, PNorm = 40.4722, GNorm = 1.7862, lr_0 = 9.8163e-04
Loss = 6.3611e-01, PNorm = 40.4923, GNorm = 3.2522, lr_0 = 9.8537e-04
Loss = 6.6404e-01, PNorm = 40.5144, GNorm = 2.8025, lr_0 = 9.8912e-04
Loss = 6.9264e-01, PNorm = 40.5259, GNorm = 2.4442, lr_0 = 9.9288e-04
Loss = 6.2462e-01, PNorm = 40.5558, GNorm = 7.3849, lr_0 = 9.9663e-04
Loss = 6.5989e-01, PNorm = 40.5882, GNorm = 3.2678, lr_0 = 9.9993e-04
Validation mae = 0.149311
Epoch 2
Loss = 5.8636e-01, PNorm = 40.6261, GNorm = 1.5094, lr_0 = 9.9925e-04
Loss = 6.2050e-01, PNorm = 40.6438, GNorm = 1.6551, lr_0 = 9.9856e-04
Loss = 5.5752e-01, PNorm = 40.6638, GNorm = 1.0631, lr_0 = 9.9788e-04
Loss = 5.6061e-01, PNorm = 40.6833, GNorm = 2.3342, lr_0 = 9.9719e-04
Loss = 5.8180e-01, PNorm = 40.7047, GNorm = 3.0159, lr_0 = 9.9651e-04
Loss = 5.0351e-01, PNorm = 40.7182, GNorm = 1.0372, lr_0 = 9.9583e-04
Loss = 5.5454e-01, PNorm = 40.7411, GNorm = 3.2618, lr_0 = 9.9515e-04
Loss = 6.1231e-01, PNorm = 40.7630, GNorm = 1.9250, lr_0 = 9.9446e-04
Loss = 6.0895e-01, PNorm = 40.7881, GNorm = 1.1269, lr_0 = 9.9378e-04
Loss = 4.8701e-01, PNorm = 40.8080, GNorm = 1.0230, lr_0 = 9.9310e-04
Loss = 4.7338e-01, PNorm = 40.8187, GNorm = 1.5880, lr_0 = 9.9242e-04
Loss = 5.0306e-01, PNorm = 40.8442, GNorm = 2.0334, lr_0 = 9.9174e-04
Loss = 5.6682e-01, PNorm = 40.8700, GNorm = 1.3744, lr_0 = 9.9106e-04
Loss = 5.7224e-01, PNorm = 40.8853, GNorm = 1.1213, lr_0 = 9.9038e-04
Loss = 6.3361e-01, PNorm = 40.9160, GNorm = 1.8318, lr_0 = 9.8971e-04
Loss = 5.6058e-01, PNorm = 40.9453, GNorm = 3.7316, lr_0 = 9.8903e-04
Loss = 5.2540e-01, PNorm = 40.9717, GNorm = 2.4055, lr_0 = 9.8835e-04
Loss = 5.9117e-01, PNorm = 41.0000, GNorm = 3.5649, lr_0 = 9.8767e-04
Loss = 5.2277e-01, PNorm = 41.0237, GNorm = 2.0101, lr_0 = 9.8700e-04
Loss = 5.6593e-01, PNorm = 41.0404, GNorm = 4.3255, lr_0 = 9.8632e-04
Loss = 6.2679e-01, PNorm = 41.0609, GNorm = 2.4443, lr_0 = 9.8564e-04
Loss = 6.0771e-01, PNorm = 41.0887, GNorm = 1.5138, lr_0 = 9.8497e-04
Loss = 6.0988e-01, PNorm = 41.1103, GNorm = 4.5166, lr_0 = 9.8429e-04
Loss = 5.7675e-01, PNorm = 41.1281, GNorm = 1.9601, lr_0 = 9.8362e-04
Loss = 5.9028e-01, PNorm = 41.1568, GNorm = 2.5908, lr_0 = 9.8295e-04
Loss = 5.4740e-01, PNorm = 41.1783, GNorm = 2.5654, lr_0 = 9.8227e-04
Loss = 5.0034e-01, PNorm = 41.2049, GNorm = 0.8342, lr_0 = 9.8160e-04
Loss = 4.6358e-01, PNorm = 41.2255, GNorm = 2.3032, lr_0 = 9.8093e-04
Loss = 4.8804e-01, PNorm = 41.2400, GNorm = 2.1413, lr_0 = 9.8026e-04
Loss = 5.3902e-01, PNorm = 41.2580, GNorm = 2.5926, lr_0 = 9.7958e-04
Loss = 5.0306e-01, PNorm = 41.2760, GNorm = 4.3708, lr_0 = 9.7891e-04
Loss = 6.0111e-01, PNorm = 41.2914, GNorm = 3.6447, lr_0 = 9.7824e-04
Loss = 5.5057e-01, PNorm = 41.3107, GNorm = 1.6387, lr_0 = 9.7757e-04
Loss = 6.1098e-01, PNorm = 41.3317, GNorm = 0.9171, lr_0 = 9.7690e-04
Loss = 6.2848e-01, PNorm = 41.3519, GNorm = 2.5438, lr_0 = 9.7623e-04
Loss = 5.1362e-01, PNorm = 41.3812, GNorm = 2.9143, lr_0 = 9.7556e-04
Loss = 4.6475e-01, PNorm = 41.4011, GNorm = 1.1105, lr_0 = 9.7490e-04
Loss = 5.2896e-01, PNorm = 41.4179, GNorm = 3.3535, lr_0 = 9.7423e-04
Loss = 5.6313e-01, PNorm = 41.4392, GNorm = 2.0421, lr_0 = 9.7356e-04
Loss = 5.3342e-01, PNorm = 41.4600, GNorm = 1.8984, lr_0 = 9.7289e-04
Loss = 5.3897e-01, PNorm = 41.4838, GNorm = 2.5039, lr_0 = 9.7223e-04
Loss = 4.9004e-01, PNorm = 41.5067, GNorm = 1.3588, lr_0 = 9.7156e-04
Loss = 5.7696e-01, PNorm = 41.5289, GNorm = 2.9865, lr_0 = 9.7090e-04
Loss = 5.4845e-01, PNorm = 41.5455, GNorm = 1.0898, lr_0 = 9.7023e-04
Loss = 5.7416e-01, PNorm = 41.5626, GNorm = 1.3942, lr_0 = 9.6957e-04
Loss = 4.9233e-01, PNorm = 41.5785, GNorm = 1.1947, lr_0 = 9.6890e-04
Loss = 5.6966e-01, PNorm = 41.6080, GNorm = 4.3400, lr_0 = 9.6824e-04
Loss = 4.4896e-01, PNorm = 41.6251, GNorm = 1.1901, lr_0 = 9.6757e-04
Loss = 6.0705e-01, PNorm = 41.6383, GNorm = 2.2197, lr_0 = 9.6691e-04
Loss = 5.9784e-01, PNorm = 41.6575, GNorm = 2.5183, lr_0 = 9.6625e-04
Loss = 5.1684e-01, PNorm = 41.6845, GNorm = 2.3461, lr_0 = 9.6559e-04
Loss = 5.6640e-01, PNorm = 41.7039, GNorm = 1.1098, lr_0 = 9.6493e-04
Loss = 5.1036e-01, PNorm = 41.7181, GNorm = 2.6719, lr_0 = 9.6427e-04
Loss = 5.1443e-01, PNorm = 41.7450, GNorm = 3.1412, lr_0 = 9.6360e-04
Loss = 5.9240e-01, PNorm = 41.7691, GNorm = 1.8706, lr_0 = 9.6294e-04
Loss = 5.2358e-01, PNorm = 41.7896, GNorm = 0.7512, lr_0 = 9.6228e-04
Loss = 5.9857e-01, PNorm = 41.8091, GNorm = 1.7594, lr_0 = 9.6163e-04
Loss = 6.1540e-01, PNorm = 41.8337, GNorm = 0.8927, lr_0 = 9.6097e-04
Loss = 5.5240e-01, PNorm = 41.8548, GNorm = 0.9425, lr_0 = 9.6031e-04
Loss = 5.7903e-01, PNorm = 41.8800, GNorm = 2.0554, lr_0 = 9.5965e-04
Loss = 5.7142e-01, PNorm = 41.9069, GNorm = 1.9328, lr_0 = 9.5899e-04
Loss = 4.9873e-01, PNorm = 41.9256, GNorm = 1.6141, lr_0 = 9.5834e-04
Loss = 5.5834e-01, PNorm = 41.9500, GNorm = 1.0707, lr_0 = 9.5768e-04
Loss = 6.0795e-01, PNorm = 41.9750, GNorm = 2.3983, lr_0 = 9.5702e-04
Loss = 5.5864e-01, PNorm = 41.9928, GNorm = 1.3552, lr_0 = 9.5637e-04
Loss = 5.5678e-01, PNorm = 42.0191, GNorm = 3.2722, lr_0 = 9.5571e-04
Loss = 5.8440e-01, PNorm = 42.0414, GNorm = 2.1379, lr_0 = 9.5506e-04
Loss = 6.3585e-01, PNorm = 42.0586, GNorm = 2.7432, lr_0 = 9.5440e-04
Loss = 4.5695e-01, PNorm = 42.0752, GNorm = 1.7726, lr_0 = 9.5375e-04
Loss = 4.7855e-01, PNorm = 42.0909, GNorm = 1.3051, lr_0 = 9.5310e-04
Loss = 5.7835e-01, PNorm = 42.1134, GNorm = 1.6844, lr_0 = 9.5244e-04
Loss = 5.1464e-01, PNorm = 42.1279, GNorm = 1.5544, lr_0 = 9.5179e-04
Loss = 5.1948e-01, PNorm = 42.1500, GNorm = 0.9635, lr_0 = 9.5114e-04
Loss = 6.4233e-01, PNorm = 42.1709, GNorm = 1.2069, lr_0 = 9.5049e-04
Loss = 5.6460e-01, PNorm = 42.1997, GNorm = 2.8699, lr_0 = 9.4984e-04
Loss = 5.4267e-01, PNorm = 42.2277, GNorm = 2.8679, lr_0 = 9.4919e-04
Loss = 5.0633e-01, PNorm = 42.2481, GNorm = 1.4968, lr_0 = 9.4854e-04
Loss = 5.3622e-01, PNorm = 42.2607, GNorm = 2.0857, lr_0 = 9.4789e-04
Loss = 5.2622e-01, PNorm = 42.2750, GNorm = 1.5264, lr_0 = 9.4724e-04
Loss = 5.2945e-01, PNorm = 42.2980, GNorm = 1.2024, lr_0 = 9.4659e-04
Loss = 5.4548e-01, PNorm = 42.3189, GNorm = 1.8712, lr_0 = 9.4594e-04
Loss = 5.6934e-01, PNorm = 42.3374, GNorm = 4.4720, lr_0 = 9.4529e-04
Loss = 5.4396e-01, PNorm = 42.3607, GNorm = 1.7090, lr_0 = 9.4464e-04
Loss = 5.5068e-01, PNorm = 42.3854, GNorm = 1.7406, lr_0 = 9.4400e-04
Loss = 5.2643e-01, PNorm = 42.4069, GNorm = 1.2098, lr_0 = 9.4335e-04
Loss = 5.3723e-01, PNorm = 42.4197, GNorm = 1.8108, lr_0 = 9.4270e-04
Loss = 5.5567e-01, PNorm = 42.4371, GNorm = 2.5272, lr_0 = 9.4206e-04
Loss = 4.9797e-01, PNorm = 42.4578, GNorm = 1.1454, lr_0 = 9.4141e-04
Loss = 5.5369e-01, PNorm = 42.4799, GNorm = 1.3130, lr_0 = 9.4077e-04
Loss = 4.6621e-01, PNorm = 42.4973, GNorm = 1.3002, lr_0 = 9.4012e-04
Loss = 4.5258e-01, PNorm = 42.5109, GNorm = 1.1837, lr_0 = 9.3948e-04
Loss = 5.3096e-01, PNorm = 42.5278, GNorm = 2.1699, lr_0 = 9.3884e-04
Loss = 5.0649e-01, PNorm = 42.5436, GNorm = 1.5103, lr_0 = 9.3819e-04
Loss = 6.5479e-01, PNorm = 42.5662, GNorm = 7.4815, lr_0 = 9.3755e-04
Loss = 5.2756e-01, PNorm = 42.5878, GNorm = 1.8490, lr_0 = 9.3691e-04
Loss = 5.2184e-01, PNorm = 42.6132, GNorm = 1.4354, lr_0 = 9.3627e-04
Loss = 5.1472e-01, PNorm = 42.6325, GNorm = 3.8772, lr_0 = 9.3562e-04
Loss = 5.1521e-01, PNorm = 42.6531, GNorm = 1.2354, lr_0 = 9.3498e-04
Loss = 5.0040e-01, PNorm = 42.6711, GNorm = 2.2616, lr_0 = 9.3434e-04
Loss = 5.1277e-01, PNorm = 42.6837, GNorm = 1.0139, lr_0 = 9.3370e-04
Loss = 5.1292e-01, PNorm = 42.7078, GNorm = 2.0541, lr_0 = 9.3306e-04
Loss = 6.2023e-01, PNorm = 42.7328, GNorm = 3.4221, lr_0 = 9.3242e-04
Loss = 6.0729e-01, PNorm = 42.7610, GNorm = 1.4486, lr_0 = 9.3178e-04
Loss = 5.8901e-01, PNorm = 42.7787, GNorm = 2.4932, lr_0 = 9.3115e-04
Loss = 5.7521e-01, PNorm = 42.7986, GNorm = 1.5372, lr_0 = 9.3051e-04
Loss = 5.4817e-01, PNorm = 42.8279, GNorm = 1.1477, lr_0 = 9.2987e-04
Loss = 5.0647e-01, PNorm = 42.8427, GNorm = 2.1560, lr_0 = 9.2923e-04
Loss = 5.4510e-01, PNorm = 42.8664, GNorm = 1.3534, lr_0 = 9.2860e-04
Loss = 4.9573e-01, PNorm = 42.8916, GNorm = 1.1963, lr_0 = 9.2796e-04
Loss = 4.9070e-01, PNorm = 42.9077, GNorm = 3.2231, lr_0 = 9.2733e-04
Loss = 4.9071e-01, PNorm = 42.9205, GNorm = 2.4304, lr_0 = 9.2669e-04
Loss = 4.9395e-01, PNorm = 42.9443, GNorm = 1.2272, lr_0 = 9.2606e-04
Loss = 5.0940e-01, PNorm = 42.9730, GNorm = 1.0117, lr_0 = 9.2542e-04
Loss = 5.9630e-01, PNorm = 42.9854, GNorm = 2.2295, lr_0 = 9.2479e-04
Loss = 4.5898e-01, PNorm = 43.0018, GNorm = 1.5418, lr_0 = 9.2415e-04
Loss = 5.1190e-01, PNorm = 43.0251, GNorm = 1.0556, lr_0 = 9.2352e-04
Loss = 6.2880e-01, PNorm = 43.0341, GNorm = 1.4002, lr_0 = 9.2289e-04
Loss = 4.7350e-01, PNorm = 43.0516, GNorm = 2.4420, lr_0 = 9.2226e-04
Loss = 4.8025e-01, PNorm = 43.0718, GNorm = 1.1363, lr_0 = 9.2162e-04
Loss = 4.9834e-01, PNorm = 43.0829, GNorm = 2.3256, lr_0 = 9.2099e-04
Validation mae = 0.126274
Epoch 3
Loss = 5.0373e-01, PNorm = 43.0979, GNorm = 2.1682, lr_0 = 9.2036e-04
Loss = 5.6515e-01, PNorm = 43.1297, GNorm = 2.1602, lr_0 = 9.1973e-04
Loss = 5.2898e-01, PNorm = 43.1573, GNorm = 1.3928, lr_0 = 9.1910e-04
Loss = 4.9983e-01, PNorm = 43.1860, GNorm = 4.5732, lr_0 = 9.1847e-04
Loss = 4.4443e-01, PNorm = 43.2063, GNorm = 1.3013, lr_0 = 9.1784e-04
Loss = 4.6788e-01, PNorm = 43.2267, GNorm = 2.0728, lr_0 = 9.1721e-04
Loss = 5.2776e-01, PNorm = 43.2341, GNorm = 1.0579, lr_0 = 9.1658e-04
Loss = 4.9870e-01, PNorm = 43.2527, GNorm = 2.0583, lr_0 = 9.1596e-04
Loss = 5.1233e-01, PNorm = 43.2752, GNorm = 1.3848, lr_0 = 9.1533e-04
Loss = 5.2459e-01, PNorm = 43.2974, GNorm = 1.0799, lr_0 = 9.1470e-04
Loss = 5.3076e-01, PNorm = 43.3233, GNorm = 1.4590, lr_0 = 9.1408e-04
Loss = 4.7476e-01, PNorm = 43.3382, GNorm = 1.1259, lr_0 = 9.1345e-04
Loss = 4.4079e-01, PNorm = 43.3559, GNorm = 1.2409, lr_0 = 9.1282e-04
Loss = 5.1468e-01, PNorm = 43.3753, GNorm = 1.8437, lr_0 = 9.1220e-04
Loss = 5.0055e-01, PNorm = 43.3838, GNorm = 2.1584, lr_0 = 9.1157e-04
Loss = 4.9561e-01, PNorm = 43.4034, GNorm = 2.3012, lr_0 = 9.1095e-04
Loss = 5.4654e-01, PNorm = 43.4168, GNorm = 1.7822, lr_0 = 9.1032e-04
Loss = 6.0193e-01, PNorm = 43.4294, GNorm = 2.3393, lr_0 = 9.0970e-04
Loss = 5.3839e-01, PNorm = 43.4460, GNorm = 1.0722, lr_0 = 9.0908e-04
Loss = 5.2781e-01, PNorm = 43.4704, GNorm = 4.8682, lr_0 = 9.0846e-04
Loss = 4.9117e-01, PNorm = 43.4924, GNorm = 1.5124, lr_0 = 9.0783e-04
Loss = 5.6901e-01, PNorm = 43.5161, GNorm = 2.9320, lr_0 = 9.0721e-04
Loss = 6.3384e-01, PNorm = 43.5447, GNorm = 4.8241, lr_0 = 9.0659e-04
Loss = 5.8661e-01, PNorm = 43.5696, GNorm = 1.1513, lr_0 = 9.0597e-04
Loss = 4.6594e-01, PNorm = 43.5918, GNorm = 1.0891, lr_0 = 9.0535e-04
Loss = 5.4429e-01, PNorm = 43.6067, GNorm = 1.2537, lr_0 = 9.0473e-04
Loss = 4.7275e-01, PNorm = 43.6293, GNorm = 1.6217, lr_0 = 9.0411e-04
Loss = 5.6008e-01, PNorm = 43.6405, GNorm = 0.9668, lr_0 = 9.0349e-04
Loss = 4.0517e-01, PNorm = 43.6581, GNorm = 0.9403, lr_0 = 9.0287e-04
Loss = 5.2563e-01, PNorm = 43.6801, GNorm = 1.4246, lr_0 = 9.0225e-04
Loss = 5.3082e-01, PNorm = 43.7006, GNorm = 2.8804, lr_0 = 9.0163e-04
Loss = 4.6570e-01, PNorm = 43.7108, GNorm = 1.6217, lr_0 = 9.0102e-04
Loss = 5.4717e-01, PNorm = 43.7271, GNorm = 0.7374, lr_0 = 9.0040e-04
Loss = 4.5662e-01, PNorm = 43.7523, GNorm = 1.7704, lr_0 = 8.9978e-04
Loss = 5.0841e-01, PNorm = 43.7660, GNorm = 1.1053, lr_0 = 8.9916e-04
Loss = 5.0301e-01, PNorm = 43.7907, GNorm = 0.9025, lr_0 = 8.9855e-04
Loss = 5.2267e-01, PNorm = 43.8082, GNorm = 1.1591, lr_0 = 8.9793e-04
Loss = 4.7686e-01, PNorm = 43.8271, GNorm = 1.1563, lr_0 = 8.9732e-04
Loss = 4.8227e-01, PNorm = 43.8452, GNorm = 1.4497, lr_0 = 8.9670e-04
Loss = 5.1515e-01, PNorm = 43.8608, GNorm = 2.0907, lr_0 = 8.9609e-04
Loss = 4.4543e-01, PNorm = 43.8777, GNorm = 2.2934, lr_0 = 8.9548e-04
Loss = 4.8524e-01, PNorm = 43.8977, GNorm = 1.4282, lr_0 = 8.9486e-04
Loss = 5.0043e-01, PNorm = 43.9143, GNorm = 1.3133, lr_0 = 8.9425e-04
Loss = 5.7363e-01, PNorm = 43.9305, GNorm = 2.9474, lr_0 = 8.9364e-04
Loss = 5.7931e-01, PNorm = 43.9482, GNorm = 1.4051, lr_0 = 8.9302e-04
Loss = 4.9260e-01, PNorm = 43.9601, GNorm = 0.8432, lr_0 = 8.9241e-04
Loss = 5.0827e-01, PNorm = 43.9823, GNorm = 2.0702, lr_0 = 8.9180e-04
Loss = 4.9498e-01, PNorm = 44.0020, GNorm = 1.6414, lr_0 = 8.9119e-04
Loss = 5.4357e-01, PNorm = 44.0276, GNorm = 1.0661, lr_0 = 8.9058e-04
Loss = 4.7149e-01, PNorm = 44.0506, GNorm = 1.0126, lr_0 = 8.8997e-04
Loss = 5.0646e-01, PNorm = 44.0695, GNorm = 1.1184, lr_0 = 8.8936e-04
Loss = 5.7021e-01, PNorm = 44.0938, GNorm = 1.8215, lr_0 = 8.8875e-04
Loss = 5.3634e-01, PNorm = 44.1142, GNorm = 1.4116, lr_0 = 8.8814e-04
Loss = 5.2803e-01, PNorm = 44.1397, GNorm = 2.4949, lr_0 = 8.8753e-04
Loss = 5.0288e-01, PNorm = 44.1657, GNorm = 2.6839, lr_0 = 8.8693e-04
Loss = 5.2532e-01, PNorm = 44.1863, GNorm = 2.3786, lr_0 = 8.8632e-04
Loss = 5.3192e-01, PNorm = 44.2124, GNorm = 2.1018, lr_0 = 8.8571e-04
Loss = 5.0060e-01, PNorm = 44.2411, GNorm = 1.7353, lr_0 = 8.8510e-04
Loss = 5.1896e-01, PNorm = 44.2714, GNorm = 1.1614, lr_0 = 8.8450e-04
Loss = 5.2224e-01, PNorm = 44.2986, GNorm = 1.7842, lr_0 = 8.8389e-04
Loss = 4.8783e-01, PNorm = 44.3329, GNorm = 1.4615, lr_0 = 8.8329e-04
Loss = 5.1735e-01, PNorm = 44.3593, GNorm = 1.6071, lr_0 = 8.8268e-04
Loss = 5.6923e-01, PNorm = 44.3778, GNorm = 0.9176, lr_0 = 8.8208e-04
Loss = 4.9543e-01, PNorm = 44.3971, GNorm = 2.2931, lr_0 = 8.8147e-04
Loss = 4.9895e-01, PNorm = 44.4170, GNorm = 4.0861, lr_0 = 8.8087e-04
Loss = 4.5945e-01, PNorm = 44.4316, GNorm = 1.4416, lr_0 = 8.8026e-04
Loss = 5.5519e-01, PNorm = 44.4584, GNorm = 1.0985, lr_0 = 8.7966e-04
Loss = 4.4051e-01, PNorm = 44.4809, GNorm = 1.1933, lr_0 = 8.7906e-04
Loss = 4.8486e-01, PNorm = 44.5047, GNorm = 1.6535, lr_0 = 8.7846e-04
Loss = 5.4172e-01, PNorm = 44.5321, GNorm = 1.5955, lr_0 = 8.7785e-04
Loss = 5.0937e-01, PNorm = 44.5474, GNorm = 2.0835, lr_0 = 8.7725e-04
Loss = 4.6576e-01, PNorm = 44.5680, GNorm = 0.9765, lr_0 = 8.7665e-04
Loss = 5.2904e-01, PNorm = 44.5913, GNorm = 2.1336, lr_0 = 8.7605e-04
Loss = 5.1023e-01, PNorm = 44.6080, GNorm = 3.8918, lr_0 = 8.7545e-04
Loss = 5.4528e-01, PNorm = 44.6228, GNorm = 1.1585, lr_0 = 8.7485e-04
Loss = 5.5179e-01, PNorm = 44.6345, GNorm = 2.8095, lr_0 = 8.7425e-04
Loss = 5.3565e-01, PNorm = 44.6523, GNorm = 1.4219, lr_0 = 8.7365e-04
Loss = 5.0474e-01, PNorm = 44.6723, GNorm = 1.0919, lr_0 = 8.7306e-04
Loss = 5.0976e-01, PNorm = 44.6929, GNorm = 1.1591, lr_0 = 8.7246e-04
Loss = 4.9101e-01, PNorm = 44.7184, GNorm = 2.2764, lr_0 = 8.7186e-04
Loss = 5.3648e-01, PNorm = 44.7402, GNorm = 1.0483, lr_0 = 8.7126e-04
Loss = 4.7705e-01, PNorm = 44.7606, GNorm = 1.9082, lr_0 = 8.7067e-04
Loss = 4.9968e-01, PNorm = 44.7712, GNorm = 1.6877, lr_0 = 8.7007e-04
Loss = 5.0465e-01, PNorm = 44.7883, GNorm = 1.4972, lr_0 = 8.6947e-04
Loss = 4.8150e-01, PNorm = 44.8047, GNorm = 1.6482, lr_0 = 8.6888e-04
Loss = 4.4722e-01, PNorm = 44.8216, GNorm = 0.9879, lr_0 = 8.6828e-04
Loss = 4.2464e-01, PNorm = 44.8482, GNorm = 1.0232, lr_0 = 8.6769e-04
Loss = 4.3608e-01, PNorm = 44.8676, GNorm = 1.8887, lr_0 = 8.6709e-04
Loss = 4.8419e-01, PNorm = 44.8832, GNorm = 1.1315, lr_0 = 8.6650e-04
Loss = 4.6532e-01, PNorm = 44.9016, GNorm = 2.8017, lr_0 = 8.6590e-04
Loss = 4.4711e-01, PNorm = 44.9201, GNorm = 1.0172, lr_0 = 8.6531e-04
Loss = 5.2032e-01, PNorm = 44.9387, GNorm = 1.0862, lr_0 = 8.6472e-04
Loss = 5.0862e-01, PNorm = 44.9532, GNorm = 2.3632, lr_0 = 8.6413e-04
Loss = 5.0124e-01, PNorm = 44.9700, GNorm = 1.9879, lr_0 = 8.6353e-04
Loss = 5.0765e-01, PNorm = 44.9887, GNorm = 1.3820, lr_0 = 8.6294e-04
Loss = 5.2347e-01, PNorm = 45.0073, GNorm = 1.9200, lr_0 = 8.6235e-04
Loss = 5.0539e-01, PNorm = 45.0195, GNorm = 1.2434, lr_0 = 8.6176e-04
Loss = 5.2315e-01, PNorm = 45.0467, GNorm = 1.6383, lr_0 = 8.6117e-04
Loss = 5.2942e-01, PNorm = 45.0630, GNorm = 1.1244, lr_0 = 8.6058e-04
Loss = 5.2162e-01, PNorm = 45.0763, GNorm = 2.3458, lr_0 = 8.5999e-04
Loss = 5.2997e-01, PNorm = 45.0915, GNorm = 1.2491, lr_0 = 8.5940e-04
Loss = 5.3855e-01, PNorm = 45.1272, GNorm = 1.8086, lr_0 = 8.5881e-04
Loss = 4.6201e-01, PNorm = 45.1528, GNorm = 1.0837, lr_0 = 8.5823e-04
Loss = 5.6978e-01, PNorm = 45.1775, GNorm = 1.4740, lr_0 = 8.5764e-04
Loss = 6.4304e-01, PNorm = 45.1975, GNorm = 1.4026, lr_0 = 8.5705e-04
Loss = 5.5691e-01, PNorm = 45.2188, GNorm = 1.1232, lr_0 = 8.5646e-04
Loss = 5.9351e-01, PNorm = 45.2342, GNorm = 1.7451, lr_0 = 8.5588e-04
Loss = 4.5276e-01, PNorm = 45.2567, GNorm = 2.0746, lr_0 = 8.5529e-04
Loss = 4.6055e-01, PNorm = 45.2773, GNorm = 1.1662, lr_0 = 8.5470e-04
Loss = 4.1603e-01, PNorm = 45.2898, GNorm = 1.3069, lr_0 = 8.5412e-04
Loss = 4.9718e-01, PNorm = 45.3051, GNorm = 1.6876, lr_0 = 8.5353e-04
Loss = 6.3970e-01, PNorm = 45.3222, GNorm = 1.4044, lr_0 = 8.5295e-04
Loss = 5.5026e-01, PNorm = 45.3464, GNorm = 3.9169, lr_0 = 8.5236e-04
Loss = 4.6822e-01, PNorm = 45.3633, GNorm = 1.1255, lr_0 = 8.5178e-04
Loss = 4.7270e-01, PNorm = 45.3781, GNorm = 0.9405, lr_0 = 8.5120e-04
Loss = 5.4263e-01, PNorm = 45.3973, GNorm = 1.2507, lr_0 = 8.5061e-04
Loss = 5.6801e-01, PNorm = 45.4176, GNorm = 3.0694, lr_0 = 8.5003e-04
Loss = 4.5589e-01, PNorm = 45.4375, GNorm = 1.5988, lr_0 = 8.4945e-04
Loss = 5.5837e-01, PNorm = 45.4627, GNorm = 1.0710, lr_0 = 8.4887e-04
Loss = 4.4125e-01, PNorm = 45.4897, GNorm = 1.0985, lr_0 = 8.4828e-04
Validation mae = 0.123338
Epoch 4
Loss = 5.2607e-01, PNorm = 45.5058, GNorm = 1.8577, lr_0 = 8.4770e-04
Loss = 5.1576e-01, PNorm = 45.5201, GNorm = 1.1754, lr_0 = 8.4712e-04
Loss = 4.9106e-01, PNorm = 45.5362, GNorm = 0.9722, lr_0 = 8.4654e-04
Loss = 4.3724e-01, PNorm = 45.5562, GNorm = 1.5287, lr_0 = 8.4596e-04
Loss = 4.6977e-01, PNorm = 45.5737, GNorm = 2.1097, lr_0 = 8.4538e-04
Loss = 4.7480e-01, PNorm = 45.5920, GNorm = 1.5398, lr_0 = 8.4480e-04
Loss = 5.0768e-01, PNorm = 45.6078, GNorm = 3.7893, lr_0 = 8.4423e-04
Loss = 5.8276e-01, PNorm = 45.6270, GNorm = 1.2042, lr_0 = 8.4365e-04
Loss = 5.3679e-01, PNorm = 45.6626, GNorm = 2.2321, lr_0 = 8.4307e-04
Loss = 5.7213e-01, PNorm = 45.6761, GNorm = 1.5373, lr_0 = 8.4249e-04
Loss = 5.2668e-01, PNorm = 45.7064, GNorm = 1.8173, lr_0 = 8.4191e-04
Loss = 4.9821e-01, PNorm = 45.7300, GNorm = 1.6552, lr_0 = 8.4134e-04
Loss = 3.9420e-01, PNorm = 45.7461, GNorm = 1.1225, lr_0 = 8.4076e-04
Loss = 4.6313e-01, PNorm = 45.7597, GNorm = 1.7141, lr_0 = 8.4019e-04
Loss = 4.9157e-01, PNorm = 45.7848, GNorm = 1.5539, lr_0 = 8.3961e-04
Loss = 4.9292e-01, PNorm = 45.8078, GNorm = 1.1567, lr_0 = 8.3903e-04
Loss = 5.1203e-01, PNorm = 45.8311, GNorm = 1.8659, lr_0 = 8.3846e-04
Loss = 4.6478e-01, PNorm = 45.8590, GNorm = 1.4868, lr_0 = 8.3789e-04
Loss = 4.9134e-01, PNorm = 45.8878, GNorm = 1.5104, lr_0 = 8.3731e-04
Loss = 4.4540e-01, PNorm = 45.9135, GNorm = 1.4357, lr_0 = 8.3674e-04
Loss = 4.5778e-01, PNorm = 45.9324, GNorm = 1.1678, lr_0 = 8.3616e-04
Loss = 4.7959e-01, PNorm = 45.9465, GNorm = 1.1670, lr_0 = 8.3559e-04
Loss = 5.1878e-01, PNorm = 45.9622, GNorm = 2.0678, lr_0 = 8.3502e-04
Loss = 4.6809e-01, PNorm = 45.9754, GNorm = 1.1923, lr_0 = 8.3445e-04
Loss = 4.2941e-01, PNorm = 45.9925, GNorm = 1.5636, lr_0 = 8.3388e-04
Loss = 5.5136e-01, PNorm = 46.0006, GNorm = 2.5849, lr_0 = 8.3330e-04
Loss = 4.6037e-01, PNorm = 46.0228, GNorm = 0.9353, lr_0 = 8.3273e-04
Loss = 5.3961e-01, PNorm = 46.0435, GNorm = 1.1726, lr_0 = 8.3216e-04
Loss = 4.2684e-01, PNorm = 46.0574, GNorm = 1.0555, lr_0 = 8.3159e-04
Loss = 5.1996e-01, PNorm = 46.0747, GNorm = 1.3306, lr_0 = 8.3102e-04
Loss = 4.9802e-01, PNorm = 46.0904, GNorm = 1.3075, lr_0 = 8.3045e-04
Loss = 5.0300e-01, PNorm = 46.1074, GNorm = 1.2866, lr_0 = 8.2988e-04
Loss = 4.3077e-01, PNorm = 46.1311, GNorm = 1.2746, lr_0 = 8.2932e-04
Loss = 5.0171e-01, PNorm = 46.1481, GNorm = 1.7654, lr_0 = 8.2875e-04
Loss = 5.5549e-01, PNorm = 46.1655, GNorm = 1.6869, lr_0 = 8.2818e-04
Loss = 4.9753e-01, PNorm = 46.1858, GNorm = 0.9989, lr_0 = 8.2761e-04
Loss = 4.8656e-01, PNorm = 46.2028, GNorm = 1.5379, lr_0 = 8.2705e-04
Loss = 4.5238e-01, PNorm = 46.2314, GNorm = 1.5777, lr_0 = 8.2648e-04
Loss = 5.1345e-01, PNorm = 46.2473, GNorm = 2.1165, lr_0 = 8.2591e-04
Loss = 4.9287e-01, PNorm = 46.2644, GNorm = 1.1390, lr_0 = 8.2535e-04
Loss = 4.7675e-01, PNorm = 46.2791, GNorm = 1.3546, lr_0 = 8.2478e-04
Loss = 5.7522e-01, PNorm = 46.2992, GNorm = 2.5261, lr_0 = 8.2422e-04
Loss = 4.8191e-01, PNorm = 46.3158, GNorm = 1.4175, lr_0 = 8.2365e-04
Loss = 4.3435e-01, PNorm = 46.3401, GNorm = 0.9499, lr_0 = 8.2309e-04
Loss = 5.0535e-01, PNorm = 46.3617, GNorm = 1.8535, lr_0 = 8.2252e-04
Loss = 4.8982e-01, PNorm = 46.3865, GNorm = 2.7286, lr_0 = 8.2196e-04
Loss = 5.0277e-01, PNorm = 46.4069, GNorm = 1.4123, lr_0 = 8.2140e-04
Loss = 4.5544e-01, PNorm = 46.4319, GNorm = 1.0928, lr_0 = 8.2084e-04
Loss = 5.6900e-01, PNorm = 46.4561, GNorm = 1.1717, lr_0 = 8.2027e-04
Loss = 4.8244e-01, PNorm = 46.4658, GNorm = 2.8240, lr_0 = 8.1971e-04
Loss = 4.8317e-01, PNorm = 46.4756, GNorm = 1.1574, lr_0 = 8.1915e-04
Loss = 4.5443e-01, PNorm = 46.4941, GNorm = 1.2129, lr_0 = 8.1859e-04
Loss = 4.9948e-01, PNorm = 46.5120, GNorm = 2.3172, lr_0 = 8.1803e-04
Loss = 4.6682e-01, PNorm = 46.5288, GNorm = 1.6983, lr_0 = 8.1747e-04
Loss = 5.1351e-01, PNorm = 46.5495, GNorm = 1.3600, lr_0 = 8.1691e-04
Loss = 4.3987e-01, PNorm = 46.5699, GNorm = 1.1957, lr_0 = 8.1635e-04
Loss = 5.1301e-01, PNorm = 46.5867, GNorm = 0.9323, lr_0 = 8.1579e-04
Loss = 4.1375e-01, PNorm = 46.6041, GNorm = 1.6655, lr_0 = 8.1523e-04
Loss = 5.0026e-01, PNorm = 46.6139, GNorm = 1.7370, lr_0 = 8.1467e-04
Loss = 5.2154e-01, PNorm = 46.6257, GNorm = 1.6700, lr_0 = 8.1411e-04
Loss = 4.8273e-01, PNorm = 46.6417, GNorm = 0.9345, lr_0 = 8.1355e-04
Loss = 5.1750e-01, PNorm = 46.6568, GNorm = 1.9138, lr_0 = 8.1300e-04
Loss = 6.2838e-01, PNorm = 46.6788, GNorm = 1.3468, lr_0 = 8.1244e-04
Loss = 4.6920e-01, PNorm = 46.6983, GNorm = 1.0954, lr_0 = 8.1188e-04
Loss = 5.2655e-01, PNorm = 46.7105, GNorm = 1.1124, lr_0 = 8.1133e-04
Loss = 5.0311e-01, PNorm = 46.7242, GNorm = 2.7634, lr_0 = 8.1077e-04
Loss = 4.7967e-01, PNorm = 46.7410, GNorm = 0.8166, lr_0 = 8.1022e-04
Loss = 4.9044e-01, PNorm = 46.7630, GNorm = 2.6902, lr_0 = 8.0966e-04
Loss = 5.2629e-01, PNorm = 46.7829, GNorm = 1.9059, lr_0 = 8.0911e-04
Loss = 5.1208e-01, PNorm = 46.8020, GNorm = 1.6931, lr_0 = 8.0855e-04
Loss = 4.3650e-01, PNorm = 46.8258, GNorm = 0.9612, lr_0 = 8.0800e-04
Loss = 4.1728e-01, PNorm = 46.8481, GNorm = 2.3389, lr_0 = 8.0745e-04
Loss = 5.7822e-01, PNorm = 46.8638, GNorm = 1.2411, lr_0 = 8.0689e-04
Loss = 5.0629e-01, PNorm = 46.8767, GNorm = 1.2517, lr_0 = 8.0634e-04
Loss = 5.1474e-01, PNorm = 46.9009, GNorm = 1.1523, lr_0 = 8.0579e-04
Loss = 4.7508e-01, PNorm = 46.9190, GNorm = 1.8159, lr_0 = 8.0523e-04
Loss = 4.5371e-01, PNorm = 46.9353, GNorm = 1.6989, lr_0 = 8.0468e-04
Loss = 4.5060e-01, PNorm = 46.9556, GNorm = 1.1607, lr_0 = 8.0413e-04
Loss = 4.2277e-01, PNorm = 46.9722, GNorm = 1.2866, lr_0 = 8.0358e-04
Loss = 4.3718e-01, PNorm = 46.9894, GNorm = 2.5126, lr_0 = 8.0303e-04
Loss = 5.0112e-01, PNorm = 47.0169, GNorm = 1.9898, lr_0 = 8.0248e-04
Loss = 4.4589e-01, PNorm = 47.0268, GNorm = 1.1158, lr_0 = 8.0193e-04
Loss = 4.2875e-01, PNorm = 47.0362, GNorm = 1.1441, lr_0 = 8.0138e-04
Loss = 4.3424e-01, PNorm = 47.0514, GNorm = 1.5913, lr_0 = 8.0083e-04
Loss = 4.6980e-01, PNorm = 47.0646, GNorm = 1.5172, lr_0 = 8.0028e-04
Loss = 4.3190e-01, PNorm = 47.0789, GNorm = 1.0244, lr_0 = 7.9974e-04
Loss = 4.2731e-01, PNorm = 47.0956, GNorm = 2.4659, lr_0 = 7.9919e-04
Loss = 5.2033e-01, PNorm = 47.1198, GNorm = 1.2430, lr_0 = 7.9864e-04
Loss = 4.8449e-01, PNorm = 47.1364, GNorm = 1.4185, lr_0 = 7.9809e-04
Loss = 4.9747e-01, PNorm = 47.1634, GNorm = 1.0318, lr_0 = 7.9755e-04
Loss = 4.7623e-01, PNorm = 47.1888, GNorm = 1.6713, lr_0 = 7.9700e-04
Loss = 4.7755e-01, PNorm = 47.2144, GNorm = 1.4003, lr_0 = 7.9645e-04
Loss = 4.9900e-01, PNorm = 47.2356, GNorm = 1.5405, lr_0 = 7.9591e-04
Loss = 5.3670e-01, PNorm = 47.2518, GNorm = 1.0676, lr_0 = 7.9536e-04
Loss = 4.8502e-01, PNorm = 47.2682, GNorm = 1.4907, lr_0 = 7.9482e-04
Loss = 5.5212e-01, PNorm = 47.2890, GNorm = 1.1807, lr_0 = 7.9427e-04
Loss = 4.9866e-01, PNorm = 47.3102, GNorm = 2.5876, lr_0 = 7.9373e-04
Loss = 5.1309e-01, PNorm = 47.3292, GNorm = 1.4294, lr_0 = 7.9319e-04
Loss = 4.8151e-01, PNorm = 47.3555, GNorm = 1.1867, lr_0 = 7.9264e-04
Loss = 4.4922e-01, PNorm = 47.3732, GNorm = 1.1313, lr_0 = 7.9210e-04
Loss = 5.0321e-01, PNorm = 47.3955, GNorm = 1.3630, lr_0 = 7.9156e-04
Loss = 4.4691e-01, PNorm = 47.4096, GNorm = 1.1855, lr_0 = 7.9101e-04
Loss = 4.3448e-01, PNorm = 47.4270, GNorm = 1.4555, lr_0 = 7.9047e-04
Loss = 4.6214e-01, PNorm = 47.4472, GNorm = 1.5082, lr_0 = 7.8993e-04
Loss = 4.8139e-01, PNorm = 47.4614, GNorm = 1.1546, lr_0 = 7.8939e-04
Loss = 4.9646e-01, PNorm = 47.4787, GNorm = 1.1905, lr_0 = 7.8885e-04
Loss = 5.0169e-01, PNorm = 47.4985, GNorm = 1.1787, lr_0 = 7.8831e-04
Loss = 5.0254e-01, PNorm = 47.5230, GNorm = 1.3676, lr_0 = 7.8777e-04
Loss = 4.7325e-01, PNorm = 47.5474, GNorm = 0.9187, lr_0 = 7.8723e-04
Loss = 4.0406e-01, PNorm = 47.5635, GNorm = 1.2843, lr_0 = 7.8669e-04
Loss = 5.1321e-01, PNorm = 47.5729, GNorm = 2.8847, lr_0 = 7.8615e-04
Loss = 5.0471e-01, PNorm = 47.5933, GNorm = 1.1829, lr_0 = 7.8561e-04
Loss = 3.9476e-01, PNorm = 47.6180, GNorm = 2.2276, lr_0 = 7.8507e-04
Loss = 4.5554e-01, PNorm = 47.6310, GNorm = 1.7067, lr_0 = 7.8454e-04
Loss = 4.4159e-01, PNorm = 47.6473, GNorm = 1.2526, lr_0 = 7.8400e-04
Loss = 4.8553e-01, PNorm = 47.6645, GNorm = 1.7144, lr_0 = 7.8346e-04
Loss = 5.1907e-01, PNorm = 47.6834, GNorm = 1.1593, lr_0 = 7.8293e-04
Loss = 4.7736e-01, PNorm = 47.7013, GNorm = 0.9560, lr_0 = 7.8239e-04
Loss = 4.5063e-01, PNorm = 47.7181, GNorm = 0.8105, lr_0 = 7.8185e-04
Loss = 4.7431e-01, PNorm = 47.7283, GNorm = 3.0802, lr_0 = 7.8132e-04
Validation mae = 0.121633
Epoch 5
Loss = 4.1927e-01, PNorm = 47.7233, GNorm = 1.7035, lr_0 = 7.8078e-04
Loss = 4.5748e-01, PNorm = 47.7351, GNorm = 1.3389, lr_0 = 7.8025e-04
Loss = 5.2009e-01, PNorm = 47.7548, GNorm = 2.0734, lr_0 = 7.7971e-04
Loss = 5.2419e-01, PNorm = 47.7696, GNorm = 2.0263, lr_0 = 7.7918e-04
Loss = 4.2802e-01, PNorm = 47.7955, GNorm = 1.1956, lr_0 = 7.7864e-04
Loss = 4.1268e-01, PNorm = 47.8157, GNorm = 1.1507, lr_0 = 7.7811e-04
Loss = 4.7285e-01, PNorm = 47.8298, GNorm = 1.3365, lr_0 = 7.7758e-04
Loss = 4.6073e-01, PNorm = 47.8388, GNorm = 0.8504, lr_0 = 7.7705e-04
Loss = 4.9391e-01, PNorm = 47.8578, GNorm = 2.9660, lr_0 = 7.7651e-04
Loss = 4.6135e-01, PNorm = 47.8812, GNorm = 1.2081, lr_0 = 7.7598e-04
Loss = 4.2752e-01, PNorm = 47.9029, GNorm = 1.1052, lr_0 = 7.7545e-04
Loss = 5.1813e-01, PNorm = 47.9243, GNorm = 1.0618, lr_0 = 7.7492e-04
Loss = 4.2058e-01, PNorm = 47.9436, GNorm = 1.3901, lr_0 = 7.7439e-04
Loss = 5.0281e-01, PNorm = 47.9529, GNorm = 1.3830, lr_0 = 7.7386e-04
Loss = 4.8567e-01, PNorm = 47.9709, GNorm = 3.1837, lr_0 = 7.7333e-04
Loss = 5.0047e-01, PNorm = 47.9879, GNorm = 3.6015, lr_0 = 7.7280e-04
Loss = 4.6162e-01, PNorm = 48.0018, GNorm = 1.8131, lr_0 = 7.7227e-04
Loss = 4.4986e-01, PNorm = 48.0234, GNorm = 0.9369, lr_0 = 7.7174e-04
Loss = 4.8626e-01, PNorm = 48.0454, GNorm = 1.0806, lr_0 = 7.7121e-04
Loss = 4.1687e-01, PNorm = 48.0660, GNorm = 0.9885, lr_0 = 7.7068e-04
Loss = 5.3690e-01, PNorm = 48.0828, GNorm = 1.6157, lr_0 = 7.7015e-04
Loss = 5.2994e-01, PNorm = 48.1010, GNorm = 1.4101, lr_0 = 7.6963e-04
Loss = 4.6769e-01, PNorm = 48.1193, GNorm = 1.2516, lr_0 = 7.6910e-04
Loss = 4.9664e-01, PNorm = 48.1451, GNorm = 0.9940, lr_0 = 7.6857e-04
Loss = 4.9230e-01, PNorm = 48.1654, GNorm = 1.6612, lr_0 = 7.6805e-04
Loss = 4.7463e-01, PNorm = 48.1842, GNorm = 1.3069, lr_0 = 7.6752e-04
Loss = 4.8325e-01, PNorm = 48.2076, GNorm = 1.8824, lr_0 = 7.6699e-04
Loss = 5.1461e-01, PNorm = 48.2282, GNorm = 1.5772, lr_0 = 7.6647e-04
Loss = 4.6920e-01, PNorm = 48.2405, GNorm = 1.3441, lr_0 = 7.6594e-04
Loss = 4.5123e-01, PNorm = 48.2573, GNorm = 1.3764, lr_0 = 7.6542e-04
Loss = 5.3218e-01, PNorm = 48.2750, GNorm = 1.6264, lr_0 = 7.6489e-04
Loss = 4.4336e-01, PNorm = 48.2912, GNorm = 0.9246, lr_0 = 7.6437e-04
Loss = 4.5566e-01, PNorm = 48.3040, GNorm = 1.4112, lr_0 = 7.6385e-04
Loss = 4.2553e-01, PNorm = 48.3211, GNorm = 1.9722, lr_0 = 7.6332e-04
Loss = 4.7102e-01, PNorm = 48.3371, GNorm = 1.1619, lr_0 = 7.6280e-04
Loss = 4.4876e-01, PNorm = 48.3646, GNorm = 2.0404, lr_0 = 7.6228e-04
Loss = 5.4506e-01, PNorm = 48.3802, GNorm = 0.8273, lr_0 = 7.6176e-04
Loss = 4.9134e-01, PNorm = 48.3994, GNorm = 1.3270, lr_0 = 7.6123e-04
Loss = 4.5210e-01, PNorm = 48.4112, GNorm = 1.1273, lr_0 = 7.6071e-04
Loss = 4.4221e-01, PNorm = 48.4240, GNorm = 1.2824, lr_0 = 7.6019e-04
Loss = 4.6609e-01, PNorm = 48.4468, GNorm = 2.0131, lr_0 = 7.5967e-04
Loss = 4.5097e-01, PNorm = 48.4645, GNorm = 1.8610, lr_0 = 7.5915e-04
Loss = 4.4911e-01, PNorm = 48.4835, GNorm = 0.9551, lr_0 = 7.5863e-04
Loss = 4.4512e-01, PNorm = 48.5022, GNorm = 1.5532, lr_0 = 7.5811e-04
Loss = 4.6514e-01, PNorm = 48.5239, GNorm = 1.8157, lr_0 = 7.5759e-04
Loss = 4.7327e-01, PNorm = 48.5413, GNorm = 1.4073, lr_0 = 7.5707e-04
Loss = 5.0353e-01, PNorm = 48.5609, GNorm = 1.2334, lr_0 = 7.5655e-04
Loss = 4.2385e-01, PNorm = 48.5748, GNorm = 1.6145, lr_0 = 7.5603e-04
Loss = 4.8317e-01, PNorm = 48.5895, GNorm = 1.2298, lr_0 = 7.5552e-04
Loss = 4.7349e-01, PNorm = 48.6044, GNorm = 1.8888, lr_0 = 7.5500e-04
Loss = 4.4151e-01, PNorm = 48.6217, GNorm = 1.8004, lr_0 = 7.5448e-04
Loss = 4.2544e-01, PNorm = 48.6406, GNorm = 1.4641, lr_0 = 7.5397e-04
Loss = 4.9278e-01, PNorm = 48.6606, GNorm = 2.8103, lr_0 = 7.5345e-04
Loss = 5.7210e-01, PNorm = 48.6830, GNorm = 1.9882, lr_0 = 7.5293e-04
Loss = 4.5546e-01, PNorm = 48.7101, GNorm = 1.6036, lr_0 = 7.5242e-04
Loss = 4.6583e-01, PNorm = 48.7393, GNorm = 0.9833, lr_0 = 7.5190e-04
Loss = 5.1233e-01, PNorm = 48.7562, GNorm = 1.8324, lr_0 = 7.5139e-04
Loss = 5.0351e-01, PNorm = 48.7765, GNorm = 1.2239, lr_0 = 7.5087e-04
Loss = 4.3189e-01, PNorm = 48.7895, GNorm = 1.2750, lr_0 = 7.5036e-04
Loss = 5.3146e-01, PNorm = 48.8145, GNorm = 1.0321, lr_0 = 7.4984e-04
Loss = 4.3901e-01, PNorm = 48.8362, GNorm = 1.8053, lr_0 = 7.4933e-04
Loss = 4.7878e-01, PNorm = 48.8501, GNorm = 1.2967, lr_0 = 7.4882e-04
Loss = 4.9037e-01, PNorm = 48.8710, GNorm = 1.6214, lr_0 = 7.4830e-04
Loss = 5.0374e-01, PNorm = 48.8903, GNorm = 2.0921, lr_0 = 7.4779e-04
Loss = 4.5256e-01, PNorm = 48.9161, GNorm = 1.5643, lr_0 = 7.4728e-04
Loss = 4.6005e-01, PNorm = 48.9272, GNorm = 1.0447, lr_0 = 7.4677e-04
Loss = 4.2460e-01, PNorm = 48.9373, GNorm = 1.2361, lr_0 = 7.4625e-04
Loss = 4.5228e-01, PNorm = 48.9524, GNorm = 1.8245, lr_0 = 7.4574e-04
Loss = 5.0931e-01, PNorm = 48.9654, GNorm = 1.6485, lr_0 = 7.4523e-04
Loss = 4.2533e-01, PNorm = 48.9813, GNorm = 1.1799, lr_0 = 7.4472e-04
Loss = 5.0913e-01, PNorm = 48.9952, GNorm = 1.3155, lr_0 = 7.4421e-04
Loss = 4.4272e-01, PNorm = 49.0152, GNorm = 2.4864, lr_0 = 7.4370e-04
Loss = 4.4092e-01, PNorm = 49.0345, GNorm = 1.5304, lr_0 = 7.4319e-04
Loss = 4.4174e-01, PNorm = 49.0497, GNorm = 2.5341, lr_0 = 7.4268e-04
Loss = 4.8294e-01, PNorm = 49.0645, GNorm = 2.6509, lr_0 = 7.4217e-04
Loss = 4.7867e-01, PNorm = 49.0785, GNorm = 1.2315, lr_0 = 7.4167e-04
Loss = 4.9968e-01, PNorm = 49.0951, GNorm = 1.4838, lr_0 = 7.4116e-04
Loss = 4.5687e-01, PNorm = 49.1098, GNorm = 1.0040, lr_0 = 7.4065e-04
Loss = 5.1471e-01, PNorm = 49.1212, GNorm = 1.2806, lr_0 = 7.4014e-04
Loss = 4.8122e-01, PNorm = 49.1374, GNorm = 0.9668, lr_0 = 7.3964e-04
Loss = 4.8669e-01, PNorm = 49.1555, GNorm = 1.7509, lr_0 = 7.3913e-04
Loss = 4.7993e-01, PNorm = 49.1690, GNorm = 1.1013, lr_0 = 7.3862e-04
Loss = 4.5734e-01, PNorm = 49.1882, GNorm = 0.7901, lr_0 = 7.3812e-04
Loss = 4.4993e-01, PNorm = 49.2040, GNorm = 1.1123, lr_0 = 7.3761e-04
Loss = 4.8385e-01, PNorm = 49.2155, GNorm = 1.1096, lr_0 = 7.3711e-04
Loss = 4.3360e-01, PNorm = 49.2334, GNorm = 0.9761, lr_0 = 7.3660e-04
Loss = 4.0337e-01, PNorm = 49.2440, GNorm = 1.4742, lr_0 = 7.3610e-04
Loss = 4.8046e-01, PNorm = 49.2576, GNorm = 2.4705, lr_0 = 7.3559e-04
Loss = 4.5211e-01, PNorm = 49.2703, GNorm = 1.3114, lr_0 = 7.3509e-04
Loss = 4.1774e-01, PNorm = 49.2864, GNorm = 1.2529, lr_0 = 7.3458e-04
Loss = 4.2098e-01, PNorm = 49.3052, GNorm = 2.1687, lr_0 = 7.3408e-04
Loss = 4.7627e-01, PNorm = 49.3149, GNorm = 1.1616, lr_0 = 7.3358e-04
Loss = 3.9363e-01, PNorm = 49.3332, GNorm = 1.8638, lr_0 = 7.3308e-04
Loss = 4.0472e-01, PNorm = 49.3487, GNorm = 1.0971, lr_0 = 7.3257e-04
Loss = 5.1073e-01, PNorm = 49.3626, GNorm = 2.0017, lr_0 = 7.3207e-04
Loss = 4.4531e-01, PNorm = 49.3781, GNorm = 1.6231, lr_0 = 7.3157e-04
Loss = 4.9139e-01, PNorm = 49.3843, GNorm = 1.2610, lr_0 = 7.3107e-04
Loss = 5.0009e-01, PNorm = 49.3962, GNorm = 1.2657, lr_0 = 7.3057e-04
Loss = 4.4541e-01, PNorm = 49.4154, GNorm = 1.6205, lr_0 = 7.3007e-04
Loss = 4.4781e-01, PNorm = 49.4273, GNorm = 1.3447, lr_0 = 7.2957e-04
Loss = 4.7983e-01, PNorm = 49.4426, GNorm = 2.3709, lr_0 = 7.2907e-04
Loss = 4.3536e-01, PNorm = 49.4669, GNorm = 1.1531, lr_0 = 7.2857e-04
Loss = 4.9475e-01, PNorm = 49.4906, GNorm = 1.6711, lr_0 = 7.2807e-04
Loss = 4.7700e-01, PNorm = 49.5064, GNorm = 2.6620, lr_0 = 7.2757e-04
Loss = 4.9502e-01, PNorm = 49.5256, GNorm = 1.4418, lr_0 = 7.2707e-04
Loss = 4.8514e-01, PNorm = 49.5443, GNorm = 2.7595, lr_0 = 7.2657e-04
Loss = 4.2183e-01, PNorm = 49.5655, GNorm = 1.2407, lr_0 = 7.2608e-04
Loss = 4.3949e-01, PNorm = 49.5791, GNorm = 2.0486, lr_0 = 7.2558e-04
Loss = 4.3303e-01, PNorm = 49.5972, GNorm = 1.4855, lr_0 = 7.2508e-04
Loss = 4.7624e-01, PNorm = 49.6144, GNorm = 2.0404, lr_0 = 7.2458e-04
Loss = 5.3694e-01, PNorm = 49.6308, GNorm = 2.0765, lr_0 = 7.2409e-04
Loss = 4.9789e-01, PNorm = 49.6476, GNorm = 1.2659, lr_0 = 7.2359e-04
Loss = 4.4874e-01, PNorm = 49.6684, GNorm = 1.5464, lr_0 = 7.2310e-04
Loss = 4.0162e-01, PNorm = 49.6791, GNorm = 1.3489, lr_0 = 7.2260e-04
Loss = 4.1910e-01, PNorm = 49.6857, GNorm = 1.0345, lr_0 = 7.2211e-04
Loss = 4.5334e-01, PNorm = 49.7019, GNorm = 1.6985, lr_0 = 7.2161e-04
Loss = 4.8126e-01, PNorm = 49.7227, GNorm = 1.1386, lr_0 = 7.2112e-04
Loss = 4.3963e-01, PNorm = 49.7321, GNorm = 1.3177, lr_0 = 7.2062e-04
Loss = 4.0850e-01, PNorm = 49.7375, GNorm = 0.9210, lr_0 = 7.2013e-04
Loss = 4.9869e-01, PNorm = 49.7516, GNorm = 1.3572, lr_0 = 7.1964e-04
Validation mae = 0.121348
Epoch 6
Loss = 4.3583e-01, PNorm = 49.7651, GNorm = 1.5953, lr_0 = 7.1914e-04
Loss = 4.7064e-01, PNorm = 49.7844, GNorm = 1.2978, lr_0 = 7.1865e-04
Loss = 4.7733e-01, PNorm = 49.8070, GNorm = 2.7018, lr_0 = 7.1816e-04
Loss = 4.3553e-01, PNorm = 49.8213, GNorm = 1.3354, lr_0 = 7.1767e-04
Loss = 4.5952e-01, PNorm = 49.8330, GNorm = 1.6800, lr_0 = 7.1717e-04
Loss = 4.5687e-01, PNorm = 49.8543, GNorm = 1.6545, lr_0 = 7.1668e-04
Loss = 5.1702e-01, PNorm = 49.8763, GNorm = 1.1283, lr_0 = 7.1619e-04
Loss = 4.3532e-01, PNorm = 49.8967, GNorm = 1.4852, lr_0 = 7.1570e-04
Loss = 4.1344e-01, PNorm = 49.9185, GNorm = 1.7185, lr_0 = 7.1521e-04
Loss = 4.1860e-01, PNorm = 49.9403, GNorm = 2.6749, lr_0 = 7.1472e-04
Loss = 4.5159e-01, PNorm = 49.9472, GNorm = 1.3195, lr_0 = 7.1423e-04
Loss = 5.1517e-01, PNorm = 49.9646, GNorm = 1.0792, lr_0 = 7.1374e-04
Loss = 4.3230e-01, PNorm = 49.9774, GNorm = 0.9623, lr_0 = 7.1325e-04
Loss = 5.0615e-01, PNorm = 50.0042, GNorm = 2.3195, lr_0 = 7.1277e-04
Loss = 4.3773e-01, PNorm = 50.0217, GNorm = 2.5019, lr_0 = 7.1228e-04
Loss = 4.8380e-01, PNorm = 50.0438, GNorm = 2.0007, lr_0 = 7.1179e-04
Loss = 4.8372e-01, PNorm = 50.0718, GNorm = 1.6568, lr_0 = 7.1130e-04
Loss = 4.2030e-01, PNorm = 50.0932, GNorm = 0.8757, lr_0 = 7.1081e-04
Loss = 4.7803e-01, PNorm = 50.1111, GNorm = 2.4147, lr_0 = 7.1033e-04
Loss = 5.1231e-01, PNorm = 50.1269, GNorm = 1.9868, lr_0 = 7.0984e-04
Loss = 4.7796e-01, PNorm = 50.1452, GNorm = 1.0343, lr_0 = 7.0935e-04
Loss = 4.1663e-01, PNorm = 50.1631, GNorm = 1.5504, lr_0 = 7.0887e-04
Loss = 4.3574e-01, PNorm = 50.1820, GNorm = 1.7931, lr_0 = 7.0838e-04
Loss = 3.7938e-01, PNorm = 50.1925, GNorm = 1.1363, lr_0 = 7.0790e-04
Loss = 4.6367e-01, PNorm = 50.2082, GNorm = 1.2975, lr_0 = 7.0741e-04
Loss = 4.5090e-01, PNorm = 50.2204, GNorm = 0.7461, lr_0 = 7.0693e-04
Loss = 4.6844e-01, PNorm = 50.2378, GNorm = 1.4808, lr_0 = 7.0644e-04
Loss = 4.0568e-01, PNorm = 50.2489, GNorm = 1.2478, lr_0 = 7.0596e-04
Loss = 4.7323e-01, PNorm = 50.2611, GNorm = 1.3646, lr_0 = 7.0548e-04
Loss = 4.6816e-01, PNorm = 50.2782, GNorm = 1.2228, lr_0 = 7.0499e-04
Loss = 4.2175e-01, PNorm = 50.2990, GNorm = 1.0516, lr_0 = 7.0451e-04
Loss = 4.6149e-01, PNorm = 50.3085, GNorm = 0.9236, lr_0 = 7.0403e-04
Loss = 4.5728e-01, PNorm = 50.3179, GNorm = 1.6982, lr_0 = 7.0354e-04
Loss = 4.7672e-01, PNorm = 50.3273, GNorm = 1.1363, lr_0 = 7.0306e-04
Loss = 4.6675e-01, PNorm = 50.3407, GNorm = 1.3176, lr_0 = 7.0258e-04
Loss = 4.4061e-01, PNorm = 50.3594, GNorm = 1.1868, lr_0 = 7.0210e-04
Loss = 4.4695e-01, PNorm = 50.3791, GNorm = 1.3228, lr_0 = 7.0162e-04
Loss = 5.0890e-01, PNorm = 50.3954, GNorm = 3.2667, lr_0 = 7.0114e-04
Loss = 4.8664e-01, PNorm = 50.4102, GNorm = 1.2306, lr_0 = 7.0066e-04
Loss = 4.4337e-01, PNorm = 50.4265, GNorm = 1.3254, lr_0 = 7.0018e-04
Loss = 4.1382e-01, PNorm = 50.4402, GNorm = 1.0802, lr_0 = 6.9970e-04
Loss = 4.2000e-01, PNorm = 50.4520, GNorm = 1.4292, lr_0 = 6.9922e-04
Loss = 5.2295e-01, PNorm = 50.4696, GNorm = 1.0672, lr_0 = 6.9874e-04
Loss = 5.6121e-01, PNorm = 50.4878, GNorm = 1.1708, lr_0 = 6.9826e-04
Loss = 4.7842e-01, PNorm = 50.5075, GNorm = 2.3174, lr_0 = 6.9778e-04
Loss = 4.6887e-01, PNorm = 50.5296, GNorm = 1.9608, lr_0 = 6.9730e-04
Loss = 4.3028e-01, PNorm = 50.5479, GNorm = 1.2009, lr_0 = 6.9683e-04
Loss = 4.9456e-01, PNorm = 50.5688, GNorm = 1.0275, lr_0 = 6.9635e-04
Loss = 4.0025e-01, PNorm = 50.5882, GNorm = 0.9734, lr_0 = 6.9587e-04
Loss = 4.4496e-01, PNorm = 50.6129, GNorm = 0.8723, lr_0 = 6.9540e-04
Loss = 4.0846e-01, PNorm = 50.6286, GNorm = 1.2055, lr_0 = 6.9492e-04
Loss = 4.2842e-01, PNorm = 50.6446, GNorm = 1.0871, lr_0 = 6.9444e-04
Loss = 4.4899e-01, PNorm = 50.6542, GNorm = 2.2127, lr_0 = 6.9397e-04
Loss = 4.0719e-01, PNorm = 50.6625, GNorm = 1.1024, lr_0 = 6.9349e-04
Loss = 4.6724e-01, PNorm = 50.6758, GNorm = 1.5548, lr_0 = 6.9302e-04
Loss = 4.9961e-01, PNorm = 50.6930, GNorm = 1.3408, lr_0 = 6.9254e-04
Loss = 4.2891e-01, PNorm = 50.7076, GNorm = 1.7085, lr_0 = 6.9207e-04
Loss = 4.8901e-01, PNorm = 50.7201, GNorm = 1.8581, lr_0 = 6.9159e-04
Loss = 4.6523e-01, PNorm = 50.7411, GNorm = 2.0312, lr_0 = 6.9112e-04
Loss = 4.2310e-01, PNorm = 50.7595, GNorm = 2.2803, lr_0 = 6.9065e-04
Loss = 4.1553e-01, PNorm = 50.7761, GNorm = 1.4223, lr_0 = 6.9017e-04
Loss = 4.4907e-01, PNorm = 50.7902, GNorm = 0.9904, lr_0 = 6.8970e-04
Loss = 4.7980e-01, PNorm = 50.8067, GNorm = 1.3734, lr_0 = 6.8923e-04
Loss = 4.3602e-01, PNorm = 50.8211, GNorm = 1.1290, lr_0 = 6.8876e-04
Loss = 5.0034e-01, PNorm = 50.8401, GNorm = 1.2405, lr_0 = 6.8828e-04
Loss = 4.4994e-01, PNorm = 50.8576, GNorm = 1.3374, lr_0 = 6.8781e-04
Loss = 4.7599e-01, PNorm = 50.8701, GNorm = 2.1073, lr_0 = 6.8734e-04
Loss = 4.3771e-01, PNorm = 50.8842, GNorm = 1.3048, lr_0 = 6.8687e-04
Loss = 4.6625e-01, PNorm = 50.9032, GNorm = 2.2832, lr_0 = 6.8640e-04
Loss = 4.9102e-01, PNorm = 50.9160, GNorm = 1.6689, lr_0 = 6.8593e-04
Loss = 4.4653e-01, PNorm = 50.9287, GNorm = 1.5838, lr_0 = 6.8546e-04
Loss = 4.5230e-01, PNorm = 50.9411, GNorm = 2.0788, lr_0 = 6.8499e-04
Loss = 4.8587e-01, PNorm = 50.9480, GNorm = 2.9822, lr_0 = 6.8452e-04
Loss = 4.3717e-01, PNorm = 50.9669, GNorm = 1.3724, lr_0 = 6.8405e-04
Loss = 4.8265e-01, PNorm = 50.9907, GNorm = 2.0225, lr_0 = 6.8358e-04
Loss = 4.6636e-01, PNorm = 51.0127, GNorm = 1.1845, lr_0 = 6.8312e-04
Loss = 4.6241e-01, PNorm = 51.0305, GNorm = 1.9730, lr_0 = 6.8265e-04
Loss = 4.4068e-01, PNorm = 51.0514, GNorm = 1.9094, lr_0 = 6.8218e-04
Loss = 5.3749e-01, PNorm = 51.0646, GNorm = 0.8941, lr_0 = 6.8171e-04
Loss = 4.6229e-01, PNorm = 51.0830, GNorm = 1.3199, lr_0 = 6.8125e-04
Loss = 4.4315e-01, PNorm = 51.0997, GNorm = 2.0982, lr_0 = 6.8078e-04
Loss = 4.4719e-01, PNorm = 51.1076, GNorm = 1.5947, lr_0 = 6.8031e-04
Loss = 4.3768e-01, PNorm = 51.1210, GNorm = 1.0752, lr_0 = 6.7985e-04
Loss = 3.8240e-01, PNorm = 51.1343, GNorm = 1.0859, lr_0 = 6.7938e-04
Loss = 5.0550e-01, PNorm = 51.1410, GNorm = 2.0590, lr_0 = 6.7892e-04
Loss = 4.6905e-01, PNorm = 51.1516, GNorm = 1.3273, lr_0 = 6.7845e-04
Loss = 4.3232e-01, PNorm = 51.1627, GNorm = 1.1830, lr_0 = 6.7799e-04
Loss = 4.1780e-01, PNorm = 51.1819, GNorm = 2.3040, lr_0 = 6.7752e-04
Loss = 4.1425e-01, PNorm = 51.1957, GNorm = 1.0965, lr_0 = 6.7706e-04
Loss = 4.6629e-01, PNorm = 51.2102, GNorm = 2.4490, lr_0 = 6.7659e-04
Loss = 5.0903e-01, PNorm = 51.2259, GNorm = 1.9281, lr_0 = 6.7613e-04
Loss = 5.0043e-01, PNorm = 51.2398, GNorm = 1.2637, lr_0 = 6.7567e-04
Loss = 3.7692e-01, PNorm = 51.2546, GNorm = 1.7008, lr_0 = 6.7520e-04
Loss = 4.1790e-01, PNorm = 51.2683, GNorm = 1.3391, lr_0 = 6.7474e-04
Loss = 4.9484e-01, PNorm = 51.2770, GNorm = 1.6229, lr_0 = 6.7428e-04
Loss = 4.5501e-01, PNorm = 51.2908, GNorm = 1.8765, lr_0 = 6.7382e-04
Loss = 4.4519e-01, PNorm = 51.3078, GNorm = 1.0907, lr_0 = 6.7335e-04
Loss = 4.4472e-01, PNorm = 51.3263, GNorm = 2.3870, lr_0 = 6.7289e-04
Loss = 4.6639e-01, PNorm = 51.3353, GNorm = 1.4325, lr_0 = 6.7243e-04
Loss = 4.4229e-01, PNorm = 51.3505, GNorm = 1.1431, lr_0 = 6.7197e-04
Loss = 4.1030e-01, PNorm = 51.3674, GNorm = 2.2195, lr_0 = 6.7151e-04
Loss = 5.0968e-01, PNorm = 51.3836, GNorm = 1.6590, lr_0 = 6.7105e-04
Loss = 4.5543e-01, PNorm = 51.3956, GNorm = 2.0797, lr_0 = 6.7059e-04
Loss = 4.0921e-01, PNorm = 51.4085, GNorm = 1.0767, lr_0 = 6.7013e-04
Loss = 5.2183e-01, PNorm = 51.4240, GNorm = 1.5435, lr_0 = 6.6967e-04
Loss = 4.6752e-01, PNorm = 51.4429, GNorm = 2.0691, lr_0 = 6.6921e-04
Loss = 4.4527e-01, PNorm = 51.4563, GNorm = 0.9883, lr_0 = 6.6876e-04
Loss = 4.2622e-01, PNorm = 51.4708, GNorm = 1.3342, lr_0 = 6.6830e-04
Loss = 4.2500e-01, PNorm = 51.4807, GNorm = 1.5231, lr_0 = 6.6784e-04
Loss = 4.8193e-01, PNorm = 51.4920, GNorm = 2.1004, lr_0 = 6.6738e-04
Loss = 4.7090e-01, PNorm = 51.5097, GNorm = 0.8995, lr_0 = 6.6693e-04
Loss = 4.4162e-01, PNorm = 51.5253, GNorm = 2.4367, lr_0 = 6.6647e-04
Loss = 4.3765e-01, PNorm = 51.5407, GNorm = 1.2002, lr_0 = 6.6601e-04
Loss = 4.1135e-01, PNorm = 51.5538, GNorm = 1.0710, lr_0 = 6.6556e-04
Loss = 4.0234e-01, PNorm = 51.5697, GNorm = 2.1663, lr_0 = 6.6510e-04
Loss = 4.3102e-01, PNorm = 51.5881, GNorm = 1.8386, lr_0 = 6.6464e-04
Loss = 5.2574e-01, PNorm = 51.6059, GNorm = 1.2127, lr_0 = 6.6419e-04
Loss = 4.0267e-01, PNorm = 51.6156, GNorm = 1.3318, lr_0 = 6.6373e-04
Loss = 4.4577e-01, PNorm = 51.6244, GNorm = 1.2962, lr_0 = 6.6328e-04
Loss = 4.3002e-01, PNorm = 51.6382, GNorm = 1.0642, lr_0 = 6.6282e-04
Validation mae = 0.118081
Epoch 7
Loss = 3.8391e-01, PNorm = 51.6586, GNorm = 1.9516, lr_0 = 6.6237e-04
Loss = 3.6698e-01, PNorm = 51.6688, GNorm = 1.3348, lr_0 = 6.6192e-04
Loss = 4.4874e-01, PNorm = 51.6851, GNorm = 1.5288, lr_0 = 6.6146e-04
Loss = 4.0431e-01, PNorm = 51.7031, GNorm = 1.5798, lr_0 = 6.6101e-04
Loss = 3.8592e-01, PNorm = 51.7176, GNorm = 1.0725, lr_0 = 6.6056e-04
Loss = 5.3264e-01, PNorm = 51.7318, GNorm = 1.1424, lr_0 = 6.6011e-04
Loss = 5.1787e-01, PNorm = 51.7574, GNorm = 2.0786, lr_0 = 6.5965e-04
Loss = 4.8703e-01, PNorm = 51.7690, GNorm = 2.1255, lr_0 = 6.5920e-04
Loss = 4.3541e-01, PNorm = 51.7817, GNorm = 1.0866, lr_0 = 6.5875e-04
Loss = 4.2883e-01, PNorm = 51.7910, GNorm = 0.9850, lr_0 = 6.5830e-04
Loss = 4.9571e-01, PNorm = 51.7995, GNorm = 0.8319, lr_0 = 6.5785e-04
Loss = 4.4335e-01, PNorm = 51.8141, GNorm = 1.1983, lr_0 = 6.5740e-04
Loss = 4.6598e-01, PNorm = 51.8284, GNorm = 0.9719, lr_0 = 6.5695e-04
Loss = 4.4157e-01, PNorm = 51.8432, GNorm = 1.1777, lr_0 = 6.5650e-04
Loss = 4.6537e-01, PNorm = 51.8601, GNorm = 1.1246, lr_0 = 6.5605e-04
Loss = 4.5010e-01, PNorm = 51.8822, GNorm = 1.6744, lr_0 = 6.5560e-04
Loss = 4.5030e-01, PNorm = 51.8934, GNorm = 1.2481, lr_0 = 6.5515e-04
Loss = 4.0023e-01, PNorm = 51.9089, GNorm = 0.9394, lr_0 = 6.5470e-04
Loss = 4.3893e-01, PNorm = 51.9267, GNorm = 1.2899, lr_0 = 6.5425e-04
Loss = 4.2941e-01, PNorm = 51.9393, GNorm = 1.0499, lr_0 = 6.5380e-04
Loss = 4.0598e-01, PNorm = 51.9491, GNorm = 0.8973, lr_0 = 6.5335e-04
Loss = 5.4480e-01, PNorm = 51.9538, GNorm = 2.3596, lr_0 = 6.5291e-04
Loss = 4.2334e-01, PNorm = 51.9724, GNorm = 1.3819, lr_0 = 6.5246e-04
Loss = 4.3505e-01, PNorm = 51.9866, GNorm = 1.3234, lr_0 = 6.5201e-04
Loss = 4.1548e-01, PNorm = 52.0066, GNorm = 1.7766, lr_0 = 6.5157e-04
Loss = 5.0310e-01, PNorm = 52.0179, GNorm = 1.0781, lr_0 = 6.5112e-04
Loss = 4.0459e-01, PNorm = 52.0401, GNorm = 1.2831, lr_0 = 6.5067e-04
Loss = 4.7157e-01, PNorm = 52.0504, GNorm = 1.6726, lr_0 = 6.5023e-04
Loss = 4.0911e-01, PNorm = 52.0602, GNorm = 1.5997, lr_0 = 6.4978e-04
Loss = 4.4022e-01, PNorm = 52.0761, GNorm = 1.0836, lr_0 = 6.4934e-04
Loss = 5.6618e-01, PNorm = 52.0975, GNorm = 1.2049, lr_0 = 6.4889e-04
Loss = 4.3640e-01, PNorm = 52.1186, GNorm = 1.5978, lr_0 = 6.4845e-04
Loss = 4.2889e-01, PNorm = 52.1366, GNorm = 1.8912, lr_0 = 6.4800e-04
Loss = 4.3231e-01, PNorm = 52.1483, GNorm = 1.8690, lr_0 = 6.4756e-04
Loss = 3.9066e-01, PNorm = 52.1605, GNorm = 1.2991, lr_0 = 6.4712e-04
Loss = 4.9960e-01, PNorm = 52.1747, GNorm = 1.6012, lr_0 = 6.4667e-04
Loss = 4.8595e-01, PNorm = 52.1911, GNorm = 0.9226, lr_0 = 6.4623e-04
Loss = 4.8001e-01, PNorm = 52.2086, GNorm = 1.9485, lr_0 = 6.4579e-04
Loss = 4.2300e-01, PNorm = 52.2225, GNorm = 1.4925, lr_0 = 6.4534e-04
Loss = 3.9019e-01, PNorm = 52.2283, GNorm = 1.3010, lr_0 = 6.4490e-04
Loss = 4.7828e-01, PNorm = 52.2408, GNorm = 1.5152, lr_0 = 6.4446e-04
Loss = 4.7894e-01, PNorm = 52.2590, GNorm = 1.9651, lr_0 = 6.4402e-04
Loss = 4.0655e-01, PNorm = 52.2719, GNorm = 1.4770, lr_0 = 6.4358e-04
Loss = 3.8690e-01, PNorm = 52.2894, GNorm = 2.1156, lr_0 = 6.4314e-04
Loss = 4.3577e-01, PNorm = 52.2955, GNorm = 1.4215, lr_0 = 6.4270e-04
Loss = 4.2134e-01, PNorm = 52.2997, GNorm = 1.4297, lr_0 = 6.4226e-04
Loss = 4.5425e-01, PNorm = 52.3149, GNorm = 1.6117, lr_0 = 6.4182e-04
Loss = 4.4175e-01, PNorm = 52.3302, GNorm = 2.2515, lr_0 = 6.4138e-04
Loss = 4.6636e-01, PNorm = 52.3357, GNorm = 1.8790, lr_0 = 6.4094e-04
Loss = 4.7992e-01, PNorm = 52.3498, GNorm = 1.3773, lr_0 = 6.4050e-04
Loss = 4.5802e-01, PNorm = 52.3669, GNorm = 2.0352, lr_0 = 6.4006e-04
Loss = 4.3102e-01, PNorm = 52.3724, GNorm = 1.4385, lr_0 = 6.3962e-04
Loss = 4.9494e-01, PNorm = 52.3852, GNorm = 1.6865, lr_0 = 6.3918e-04
Loss = 4.5913e-01, PNorm = 52.4050, GNorm = 1.1453, lr_0 = 6.3874e-04
Loss = 4.2691e-01, PNorm = 52.4276, GNorm = 1.0695, lr_0 = 6.3831e-04
Loss = 3.6880e-01, PNorm = 52.4445, GNorm = 1.0647, lr_0 = 6.3787e-04
Loss = 4.1354e-01, PNorm = 52.4558, GNorm = 3.0039, lr_0 = 6.3743e-04
Loss = 4.3625e-01, PNorm = 52.4668, GNorm = 1.1910, lr_0 = 6.3700e-04
Loss = 5.0821e-01, PNorm = 52.4801, GNorm = 2.1489, lr_0 = 6.3656e-04
Loss = 4.0009e-01, PNorm = 52.4963, GNorm = 1.5796, lr_0 = 6.3612e-04
Loss = 4.1600e-01, PNorm = 52.5097, GNorm = 1.5078, lr_0 = 6.3569e-04
Loss = 3.8883e-01, PNorm = 52.5176, GNorm = 0.8061, lr_0 = 6.3525e-04
Loss = 4.2641e-01, PNorm = 52.5279, GNorm = 1.4708, lr_0 = 6.3482e-04
Loss = 4.6814e-01, PNorm = 52.5391, GNorm = 2.0416, lr_0 = 6.3438e-04
Loss = 3.9997e-01, PNorm = 52.5490, GNorm = 1.3700, lr_0 = 6.3395e-04
Loss = 4.5917e-01, PNorm = 52.5589, GNorm = 1.6448, lr_0 = 6.3351e-04
Loss = 4.5735e-01, PNorm = 52.5780, GNorm = 2.3861, lr_0 = 6.3308e-04
Loss = 5.1454e-01, PNorm = 52.5946, GNorm = 1.3162, lr_0 = 6.3265e-04
Loss = 3.8151e-01, PNorm = 52.6059, GNorm = 1.7151, lr_0 = 6.3221e-04
Loss = 5.5319e-01, PNorm = 52.6184, GNorm = 2.2665, lr_0 = 6.3178e-04
Loss = 4.4516e-01, PNorm = 52.6393, GNorm = 1.3435, lr_0 = 6.3135e-04
Loss = 4.9455e-01, PNorm = 52.6561, GNorm = 1.4217, lr_0 = 6.3091e-04
Loss = 4.3766e-01, PNorm = 52.6669, GNorm = 1.6444, lr_0 = 6.3048e-04
Loss = 4.3574e-01, PNorm = 52.6806, GNorm = 0.9018, lr_0 = 6.3005e-04
Loss = 4.1971e-01, PNorm = 52.6921, GNorm = 1.4360, lr_0 = 6.2962e-04
Loss = 4.7875e-01, PNorm = 52.6982, GNorm = 2.0736, lr_0 = 6.2919e-04
Loss = 3.8408e-01, PNorm = 52.7147, GNorm = 1.4311, lr_0 = 6.2876e-04
Loss = 4.2495e-01, PNorm = 52.7245, GNorm = 1.2476, lr_0 = 6.2833e-04
Loss = 4.4636e-01, PNorm = 52.7367, GNorm = 1.4211, lr_0 = 6.2789e-04
Loss = 4.7662e-01, PNorm = 52.7524, GNorm = 1.6752, lr_0 = 6.2746e-04
Loss = 4.1249e-01, PNorm = 52.7673, GNorm = 1.0611, lr_0 = 6.2703e-04
Loss = 4.7423e-01, PNorm = 52.7818, GNorm = 0.9688, lr_0 = 6.2661e-04
Loss = 4.3698e-01, PNorm = 52.7914, GNorm = 1.1550, lr_0 = 6.2618e-04
Loss = 4.9999e-01, PNorm = 52.8032, GNorm = 1.5056, lr_0 = 6.2575e-04
Loss = 4.8262e-01, PNorm = 52.8135, GNorm = 1.4436, lr_0 = 6.2532e-04
Loss = 4.1464e-01, PNorm = 52.8265, GNorm = 1.1264, lr_0 = 6.2489e-04
Loss = 4.4631e-01, PNorm = 52.8356, GNorm = 1.7338, lr_0 = 6.2446e-04
Loss = 4.1399e-01, PNorm = 52.8508, GNorm = 2.4247, lr_0 = 6.2403e-04
Loss = 4.6268e-01, PNorm = 52.8669, GNorm = 1.5709, lr_0 = 6.2361e-04
Loss = 4.4463e-01, PNorm = 52.8839, GNorm = 1.4450, lr_0 = 6.2318e-04
Loss = 4.9363e-01, PNorm = 52.8985, GNorm = 1.7495, lr_0 = 6.2275e-04
Loss = 4.7180e-01, PNorm = 52.9111, GNorm = 1.0727, lr_0 = 6.2233e-04
Loss = 4.0781e-01, PNorm = 52.9302, GNorm = 1.6072, lr_0 = 6.2190e-04
Loss = 4.6997e-01, PNorm = 52.9416, GNorm = 3.1573, lr_0 = 6.2147e-04
Loss = 4.0988e-01, PNorm = 52.9604, GNorm = 1.1981, lr_0 = 6.2105e-04
Loss = 4.6679e-01, PNorm = 52.9704, GNorm = 2.2800, lr_0 = 6.2062e-04
Loss = 3.8957e-01, PNorm = 52.9810, GNorm = 1.3338, lr_0 = 6.2020e-04
Loss = 4.1076e-01, PNorm = 52.9872, GNorm = 1.6469, lr_0 = 6.1977e-04
Loss = 4.1915e-01, PNorm = 52.9996, GNorm = 1.1669, lr_0 = 6.1935e-04
Loss = 5.0686e-01, PNorm = 53.0173, GNorm = 2.5715, lr_0 = 6.1892e-04
Loss = 4.2053e-01, PNorm = 53.0321, GNorm = 1.1748, lr_0 = 6.1850e-04
Loss = 4.6668e-01, PNorm = 53.0467, GNorm = 1.4634, lr_0 = 6.1808e-04
Loss = 5.1848e-01, PNorm = 53.0631, GNorm = 1.4450, lr_0 = 6.1765e-04
Loss = 4.1249e-01, PNorm = 53.0810, GNorm = 1.3537, lr_0 = 6.1723e-04
Loss = 4.0554e-01, PNorm = 53.0914, GNorm = 1.3501, lr_0 = 6.1681e-04
Loss = 4.2422e-01, PNorm = 53.0983, GNorm = 1.3438, lr_0 = 6.1638e-04
Loss = 4.7690e-01, PNorm = 53.1124, GNorm = 1.3557, lr_0 = 6.1596e-04
Loss = 4.6863e-01, PNorm = 53.1302, GNorm = 1.1294, lr_0 = 6.1554e-04
Loss = 4.5345e-01, PNorm = 53.1428, GNorm = 2.4404, lr_0 = 6.1512e-04
Loss = 4.0607e-01, PNorm = 53.1552, GNorm = 0.9928, lr_0 = 6.1470e-04
Loss = 5.2000e-01, PNorm = 53.1691, GNorm = 1.4599, lr_0 = 6.1428e-04
Loss = 4.4738e-01, PNorm = 53.1822, GNorm = 1.4828, lr_0 = 6.1385e-04
Loss = 4.7735e-01, PNorm = 53.1904, GNorm = 1.7517, lr_0 = 6.1343e-04
Loss = 4.7735e-01, PNorm = 53.2047, GNorm = 1.9065, lr_0 = 6.1301e-04
Loss = 4.1592e-01, PNorm = 53.2183, GNorm = 1.2784, lr_0 = 6.1259e-04
Loss = 4.4382e-01, PNorm = 53.2282, GNorm = 1.7895, lr_0 = 6.1217e-04
Loss = 4.4740e-01, PNorm = 53.2385, GNorm = 2.2042, lr_0 = 6.1175e-04
Loss = 4.0136e-01, PNorm = 53.2526, GNorm = 1.5529, lr_0 = 6.1134e-04
Loss = 3.8840e-01, PNorm = 53.2629, GNorm = 1.0977, lr_0 = 6.1092e-04
Loss = 3.4866e-01, PNorm = 53.2719, GNorm = 1.4687, lr_0 = 6.1050e-04
Validation mae = 0.117183
Epoch 8
Loss = 4.1547e-01, PNorm = 53.2821, GNorm = 1.0146, lr_0 = 6.1008e-04
Loss = 3.6662e-01, PNorm = 53.2891, GNorm = 1.5691, lr_0 = 6.0966e-04
Loss = 3.7649e-01, PNorm = 53.3004, GNorm = 0.8165, lr_0 = 6.0924e-04
Loss = 4.2672e-01, PNorm = 53.3118, GNorm = 1.4026, lr_0 = 6.0883e-04
Loss = 4.0318e-01, PNorm = 53.3272, GNorm = 1.0109, lr_0 = 6.0841e-04
Loss = 3.9741e-01, PNorm = 53.3368, GNorm = 1.4523, lr_0 = 6.0799e-04
Loss = 4.0017e-01, PNorm = 53.3430, GNorm = 1.5562, lr_0 = 6.0758e-04
Loss = 4.1815e-01, PNorm = 53.3535, GNorm = 1.5050, lr_0 = 6.0716e-04
Loss = 4.1584e-01, PNorm = 53.3661, GNorm = 1.2151, lr_0 = 6.0674e-04
Loss = 4.5043e-01, PNorm = 53.3749, GNorm = 1.5156, lr_0 = 6.0633e-04
Loss = 4.2437e-01, PNorm = 53.3857, GNorm = 1.4963, lr_0 = 6.0591e-04
Loss = 4.1328e-01, PNorm = 53.4017, GNorm = 1.4510, lr_0 = 6.0550e-04
Loss = 4.2848e-01, PNorm = 53.4152, GNorm = 1.3269, lr_0 = 6.0508e-04
Loss = 4.1032e-01, PNorm = 53.4248, GNorm = 1.0901, lr_0 = 6.0467e-04
Loss = 4.2254e-01, PNorm = 53.4405, GNorm = 1.1763, lr_0 = 6.0425e-04
Loss = 4.3712e-01, PNorm = 53.4585, GNorm = 1.6810, lr_0 = 6.0384e-04
Loss = 4.3055e-01, PNorm = 53.4687, GNorm = 1.7678, lr_0 = 6.0343e-04
Loss = 4.5980e-01, PNorm = 53.4796, GNorm = 0.9943, lr_0 = 6.0301e-04
Loss = 3.9985e-01, PNorm = 53.4915, GNorm = 1.2134, lr_0 = 6.0260e-04
Loss = 3.5833e-01, PNorm = 53.5003, GNorm = 0.9519, lr_0 = 6.0219e-04
Loss = 4.5700e-01, PNorm = 53.5089, GNorm = 1.5614, lr_0 = 6.0178e-04
Loss = 4.2012e-01, PNorm = 53.5267, GNorm = 1.0071, lr_0 = 6.0136e-04
Loss = 4.4058e-01, PNorm = 53.5454, GNorm = 1.6045, lr_0 = 6.0095e-04
Loss = 4.8465e-01, PNorm = 53.5564, GNorm = 1.1490, lr_0 = 6.0054e-04
Loss = 4.8492e-01, PNorm = 53.5684, GNorm = 1.4901, lr_0 = 6.0013e-04
Loss = 4.3126e-01, PNorm = 53.5784, GNorm = 0.9154, lr_0 = 5.9972e-04
Loss = 4.5318e-01, PNorm = 53.5924, GNorm = 1.2388, lr_0 = 5.9931e-04
Loss = 4.6697e-01, PNorm = 53.6078, GNorm = 1.1480, lr_0 = 5.9890e-04
Loss = 4.3868e-01, PNorm = 53.6199, GNorm = 2.9193, lr_0 = 5.9849e-04
Loss = 4.1863e-01, PNorm = 53.6300, GNorm = 1.6297, lr_0 = 5.9808e-04
Loss = 4.5718e-01, PNorm = 53.6434, GNorm = 1.5778, lr_0 = 5.9767e-04
Loss = 4.5568e-01, PNorm = 53.6575, GNorm = 1.7806, lr_0 = 5.9726e-04
Loss = 4.6273e-01, PNorm = 53.6673, GNorm = 1.8611, lr_0 = 5.9685e-04
Loss = 4.9148e-01, PNorm = 53.6806, GNorm = 1.5112, lr_0 = 5.9644e-04
Loss = 3.8275e-01, PNorm = 53.6966, GNorm = 1.6635, lr_0 = 5.9603e-04
Loss = 4.8300e-01, PNorm = 53.7087, GNorm = 1.3735, lr_0 = 5.9562e-04
Loss = 5.0445e-01, PNorm = 53.7218, GNorm = 1.9004, lr_0 = 5.9521e-04
Loss = 4.6779e-01, PNorm = 53.7376, GNorm = 2.3742, lr_0 = 5.9481e-04
Loss = 4.2800e-01, PNorm = 53.7499, GNorm = 1.5022, lr_0 = 5.9440e-04
Loss = 4.6433e-01, PNorm = 53.7647, GNorm = 1.4607, lr_0 = 5.9399e-04
Loss = 4.1297e-01, PNorm = 53.7786, GNorm = 0.9901, lr_0 = 5.9358e-04
Loss = 3.6542e-01, PNorm = 53.7879, GNorm = 1.0979, lr_0 = 5.9318e-04
Loss = 4.1039e-01, PNorm = 53.7968, GNorm = 1.7241, lr_0 = 5.9277e-04
Loss = 3.7001e-01, PNorm = 53.8096, GNorm = 1.2127, lr_0 = 5.9236e-04
Loss = 4.2978e-01, PNorm = 53.8205, GNorm = 1.3401, lr_0 = 5.9196e-04
Loss = 3.9518e-01, PNorm = 53.8326, GNorm = 1.3798, lr_0 = 5.9155e-04
Loss = 4.4003e-01, PNorm = 53.8439, GNorm = 1.6482, lr_0 = 5.9115e-04
Loss = 3.8976e-01, PNorm = 53.8560, GNorm = 1.9258, lr_0 = 5.9074e-04
Loss = 5.0127e-01, PNorm = 53.8754, GNorm = 1.6774, lr_0 = 5.9034e-04
Loss = 4.1239e-01, PNorm = 53.8899, GNorm = 0.9662, lr_0 = 5.8993e-04
Loss = 4.2888e-01, PNorm = 53.9069, GNorm = 1.3193, lr_0 = 5.8953e-04
Loss = 4.3234e-01, PNorm = 53.9209, GNorm = 1.8330, lr_0 = 5.8913e-04
Loss = 4.2664e-01, PNorm = 53.9335, GNorm = 1.4326, lr_0 = 5.8872e-04
Loss = 4.2659e-01, PNorm = 53.9472, GNorm = 1.1600, lr_0 = 5.8832e-04
Loss = 4.6470e-01, PNorm = 53.9522, GNorm = 0.9131, lr_0 = 5.8792e-04
Loss = 3.8874e-01, PNorm = 53.9591, GNorm = 1.0676, lr_0 = 5.8751e-04
Loss = 4.2531e-01, PNorm = 53.9739, GNorm = 0.9459, lr_0 = 5.8711e-04
Loss = 4.3013e-01, PNorm = 53.9782, GNorm = 2.1145, lr_0 = 5.8671e-04
Loss = 4.3991e-01, PNorm = 53.9857, GNorm = 0.9911, lr_0 = 5.8631e-04
Loss = 4.3841e-01, PNorm = 53.9967, GNorm = 1.3008, lr_0 = 5.8591e-04
Loss = 4.7186e-01, PNorm = 53.9999, GNorm = 2.7863, lr_0 = 5.8550e-04
Loss = 3.9747e-01, PNorm = 54.0094, GNorm = 1.2427, lr_0 = 5.8510e-04
Loss = 5.3265e-01, PNorm = 54.0255, GNorm = 1.3512, lr_0 = 5.8470e-04
Loss = 4.6706e-01, PNorm = 54.0397, GNorm = 1.8131, lr_0 = 5.8430e-04
Loss = 4.6895e-01, PNorm = 54.0502, GNorm = 1.7352, lr_0 = 5.8390e-04
Loss = 4.3856e-01, PNorm = 54.0659, GNorm = 1.0009, lr_0 = 5.8350e-04
Loss = 3.9207e-01, PNorm = 54.0778, GNorm = 0.8702, lr_0 = 5.8310e-04
Loss = 3.8126e-01, PNorm = 54.0890, GNorm = 1.3357, lr_0 = 5.8270e-04
Loss = 4.8751e-01, PNorm = 54.1127, GNorm = 1.4862, lr_0 = 5.8230e-04
Loss = 4.7020e-01, PNorm = 54.1267, GNorm = 1.4303, lr_0 = 5.8190e-04
Loss = 4.4002e-01, PNorm = 54.1322, GNorm = 2.0346, lr_0 = 5.8151e-04
Loss = 3.3081e-01, PNorm = 54.1451, GNorm = 1.3485, lr_0 = 5.8111e-04
Loss = 4.0257e-01, PNorm = 54.1570, GNorm = 1.3768, lr_0 = 5.8071e-04
Loss = 4.3975e-01, PNorm = 54.1692, GNorm = 2.2623, lr_0 = 5.8031e-04
Loss = 4.6626e-01, PNorm = 54.1779, GNorm = 1.2872, lr_0 = 5.7991e-04
Loss = 4.0848e-01, PNorm = 54.1829, GNorm = 1.0212, lr_0 = 5.7952e-04
Loss = 5.0756e-01, PNorm = 54.1904, GNorm = 2.5096, lr_0 = 5.7912e-04
Loss = 4.5903e-01, PNorm = 54.2010, GNorm = 1.9488, lr_0 = 5.7872e-04
Loss = 4.5473e-01, PNorm = 54.2137, GNorm = 1.2499, lr_0 = 5.7833e-04
Loss = 4.4830e-01, PNorm = 54.2242, GNorm = 1.3078, lr_0 = 5.7793e-04
Loss = 4.4356e-01, PNorm = 54.2307, GNorm = 1.2908, lr_0 = 5.7753e-04
Loss = 4.1009e-01, PNorm = 54.2439, GNorm = 1.2601, lr_0 = 5.7714e-04
Loss = 4.5806e-01, PNorm = 54.2552, GNorm = 1.2837, lr_0 = 5.7674e-04
Loss = 4.1005e-01, PNorm = 54.2629, GNorm = 1.6020, lr_0 = 5.7635e-04
Loss = 4.6164e-01, PNorm = 54.2703, GNorm = 0.8553, lr_0 = 5.7595e-04
Loss = 4.3630e-01, PNorm = 54.2855, GNorm = 0.9741, lr_0 = 5.7556e-04
Loss = 4.6069e-01, PNorm = 54.2942, GNorm = 1.8114, lr_0 = 5.7516e-04
Loss = 4.0171e-01, PNorm = 54.3057, GNorm = 1.5876, lr_0 = 5.7477e-04
Loss = 4.8448e-01, PNorm = 54.3150, GNorm = 2.5366, lr_0 = 5.7438e-04
Loss = 4.4715e-01, PNorm = 54.3190, GNorm = 1.1748, lr_0 = 5.7398e-04
Loss = 4.6402e-01, PNorm = 54.3251, GNorm = 1.4360, lr_0 = 5.7359e-04
Loss = 3.9630e-01, PNorm = 54.3289, GNorm = 1.4003, lr_0 = 5.7320e-04
Loss = 4.2297e-01, PNorm = 54.3320, GNorm = 0.7522, lr_0 = 5.7280e-04
Loss = 4.7235e-01, PNorm = 54.3446, GNorm = 1.5457, lr_0 = 5.7241e-04
Loss = 4.3005e-01, PNorm = 54.3597, GNorm = 1.2667, lr_0 = 5.7202e-04
Loss = 4.3653e-01, PNorm = 54.3681, GNorm = 1.9322, lr_0 = 5.7163e-04
Loss = 4.6350e-01, PNorm = 54.3796, GNorm = 1.8234, lr_0 = 5.7124e-04
Loss = 3.7867e-01, PNorm = 54.3863, GNorm = 1.2440, lr_0 = 5.7084e-04
Loss = 3.9715e-01, PNorm = 54.4033, GNorm = 1.4390, lr_0 = 5.7045e-04
Loss = 4.5842e-01, PNorm = 54.4134, GNorm = 1.4099, lr_0 = 5.7006e-04
Loss = 4.5450e-01, PNorm = 54.4279, GNorm = 1.5777, lr_0 = 5.6967e-04
Loss = 4.0670e-01, PNorm = 54.4464, GNorm = 3.1556, lr_0 = 5.6928e-04
Loss = 4.6217e-01, PNorm = 54.4637, GNorm = 1.6605, lr_0 = 5.6889e-04
Loss = 3.8820e-01, PNorm = 54.4808, GNorm = 1.3045, lr_0 = 5.6850e-04
Loss = 4.5419e-01, PNorm = 54.4916, GNorm = 1.2218, lr_0 = 5.6811e-04
Loss = 4.2417e-01, PNorm = 54.5084, GNorm = 1.9770, lr_0 = 5.6772e-04
Loss = 3.8779e-01, PNorm = 54.5198, GNorm = 1.0651, lr_0 = 5.6733e-04
Loss = 4.7588e-01, PNorm = 54.5256, GNorm = 0.8196, lr_0 = 5.6695e-04
Loss = 4.1991e-01, PNorm = 54.5403, GNorm = 0.9884, lr_0 = 5.6656e-04
Loss = 4.2183e-01, PNorm = 54.5562, GNorm = 1.3080, lr_0 = 5.6617e-04
Loss = 4.1387e-01, PNorm = 54.5647, GNorm = 1.6002, lr_0 = 5.6578e-04
Loss = 4.2791e-01, PNorm = 54.5779, GNorm = 2.2752, lr_0 = 5.6539e-04
Loss = 4.4776e-01, PNorm = 54.5903, GNorm = 1.3296, lr_0 = 5.6501e-04
Loss = 4.2737e-01, PNorm = 54.6010, GNorm = 1.0073, lr_0 = 5.6462e-04
Loss = 3.7742e-01, PNorm = 54.6181, GNorm = 1.3475, lr_0 = 5.6423e-04
Loss = 4.3699e-01, PNorm = 54.6278, GNorm = 1.8594, lr_0 = 5.6385e-04
Loss = 4.4047e-01, PNorm = 54.6430, GNorm = 1.2445, lr_0 = 5.6346e-04
Loss = 4.3138e-01, PNorm = 54.6584, GNorm = 1.2380, lr_0 = 5.6307e-04
Loss = 4.0518e-01, PNorm = 54.6738, GNorm = 1.2762, lr_0 = 5.6269e-04
Loss = 4.1768e-01, PNorm = 54.6836, GNorm = 1.1986, lr_0 = 5.6230e-04
Validation mae = 0.115338
Epoch 9
Loss = 4.1682e-01, PNorm = 54.6956, GNorm = 1.6943, lr_0 = 5.6192e-04
Loss = 4.0407e-01, PNorm = 54.7127, GNorm = 1.8689, lr_0 = 5.6153e-04
Loss = 3.8073e-01, PNorm = 54.7299, GNorm = 1.3768, lr_0 = 5.6115e-04
Loss = 5.6377e-01, PNorm = 54.7456, GNorm = 1.2517, lr_0 = 5.6076e-04
Loss = 3.8754e-01, PNorm = 54.7608, GNorm = 1.0710, lr_0 = 5.6038e-04
Loss = 3.7734e-01, PNorm = 54.7716, GNorm = 1.1432, lr_0 = 5.6000e-04
Loss = 3.8945e-01, PNorm = 54.7857, GNorm = 1.2223, lr_0 = 5.5961e-04
Loss = 3.8953e-01, PNorm = 54.8018, GNorm = 0.9678, lr_0 = 5.5923e-04
Loss = 4.0669e-01, PNorm = 54.8156, GNorm = 1.7234, lr_0 = 5.5885e-04
Loss = 4.5358e-01, PNorm = 54.8216, GNorm = 1.2374, lr_0 = 5.5846e-04
Loss = 4.6618e-01, PNorm = 54.8328, GNorm = 1.3542, lr_0 = 5.5808e-04
Loss = 4.4567e-01, PNorm = 54.8447, GNorm = 2.3604, lr_0 = 5.5770e-04
Loss = 4.3348e-01, PNorm = 54.8557, GNorm = 2.1826, lr_0 = 5.5732e-04
Loss = 4.6200e-01, PNorm = 54.8725, GNorm = 1.0014, lr_0 = 5.5693e-04
Loss = 4.1402e-01, PNorm = 54.8863, GNorm = 1.2280, lr_0 = 5.5655e-04
Loss = 3.8478e-01, PNorm = 54.8960, GNorm = 1.1510, lr_0 = 5.5617e-04
Loss = 4.0197e-01, PNorm = 54.9014, GNorm = 1.6857, lr_0 = 5.5579e-04
Loss = 4.2187e-01, PNorm = 54.9125, GNorm = 1.1827, lr_0 = 5.5541e-04
Loss = 4.0847e-01, PNorm = 54.9236, GNorm = 1.3574, lr_0 = 5.5503e-04
Loss = 4.1098e-01, PNorm = 54.9332, GNorm = 1.5314, lr_0 = 5.5465e-04
Loss = 3.9583e-01, PNorm = 54.9398, GNorm = 1.1489, lr_0 = 5.5427e-04
Loss = 4.0085e-01, PNorm = 54.9464, GNorm = 2.1584, lr_0 = 5.5389e-04
Loss = 3.8954e-01, PNorm = 54.9623, GNorm = 2.0249, lr_0 = 5.5351e-04
Loss = 4.3984e-01, PNorm = 54.9734, GNorm = 2.1699, lr_0 = 5.5313e-04
Loss = 4.5090e-01, PNorm = 54.9813, GNorm = 1.6065, lr_0 = 5.5275e-04
Loss = 4.1186e-01, PNorm = 54.9983, GNorm = 1.3249, lr_0 = 5.5237e-04
Loss = 4.6361e-01, PNorm = 55.0132, GNorm = 1.5656, lr_0 = 5.5199e-04
Loss = 4.4368e-01, PNorm = 55.0280, GNorm = 1.4950, lr_0 = 5.5162e-04
Loss = 4.8362e-01, PNorm = 55.0395, GNorm = 1.4668, lr_0 = 5.5124e-04
Loss = 4.7843e-01, PNorm = 55.0476, GNorm = 1.9235, lr_0 = 5.5086e-04
Loss = 4.2340e-01, PNorm = 55.0618, GNorm = 1.3357, lr_0 = 5.5048e-04
Loss = 3.9150e-01, PNorm = 55.0758, GNorm = 1.2837, lr_0 = 5.5011e-04
Loss = 3.8544e-01, PNorm = 55.0821, GNorm = 1.2522, lr_0 = 5.4973e-04
Loss = 4.5903e-01, PNorm = 55.0938, GNorm = 1.5169, lr_0 = 5.4935e-04
Loss = 4.5513e-01, PNorm = 55.1020, GNorm = 0.8039, lr_0 = 5.4898e-04
Loss = 4.0981e-01, PNorm = 55.1087, GNorm = 1.1130, lr_0 = 5.4860e-04
Loss = 4.0777e-01, PNorm = 55.1170, GNorm = 1.0027, lr_0 = 5.4822e-04
Loss = 3.7358e-01, PNorm = 55.1303, GNorm = 1.4820, lr_0 = 5.4785e-04
Loss = 4.4541e-01, PNorm = 55.1452, GNorm = 1.4088, lr_0 = 5.4747e-04
Loss = 4.0084e-01, PNorm = 55.1562, GNorm = 1.2279, lr_0 = 5.4710e-04
Loss = 4.4362e-01, PNorm = 55.1670, GNorm = 1.2950, lr_0 = 5.4672e-04
Loss = 3.9969e-01, PNorm = 55.1837, GNorm = 1.6048, lr_0 = 5.4635e-04
Loss = 4.3127e-01, PNorm = 55.1898, GNorm = 1.6313, lr_0 = 5.4597e-04
Loss = 4.3694e-01, PNorm = 55.2027, GNorm = 1.2774, lr_0 = 5.4560e-04
Loss = 4.2669e-01, PNorm = 55.2140, GNorm = 0.9243, lr_0 = 5.4523e-04
Loss = 4.4083e-01, PNorm = 55.2256, GNorm = 1.2896, lr_0 = 5.4485e-04
Loss = 4.3198e-01, PNorm = 55.2331, GNorm = 1.2644, lr_0 = 5.4448e-04
Loss = 3.9704e-01, PNorm = 55.2381, GNorm = 1.0285, lr_0 = 5.4411e-04
Loss = 4.4526e-01, PNorm = 55.2461, GNorm = 1.1446, lr_0 = 5.4373e-04
Loss = 4.8393e-01, PNorm = 55.2522, GNorm = 1.8932, lr_0 = 5.4336e-04
Loss = 4.5147e-01, PNorm = 55.2580, GNorm = 1.3541, lr_0 = 5.4299e-04
Loss = 4.2673e-01, PNorm = 55.2692, GNorm = 2.1286, lr_0 = 5.4262e-04
Loss = 3.8962e-01, PNorm = 55.2815, GNorm = 1.2862, lr_0 = 5.4225e-04
Loss = 4.0700e-01, PNorm = 55.2999, GNorm = 1.4629, lr_0 = 5.4187e-04
Loss = 4.4102e-01, PNorm = 55.3080, GNorm = 0.9737, lr_0 = 5.4150e-04
Loss = 4.3391e-01, PNorm = 55.3166, GNorm = 1.2032, lr_0 = 5.4113e-04
Loss = 4.0177e-01, PNorm = 55.3288, GNorm = 1.4569, lr_0 = 5.4076e-04
Loss = 4.5943e-01, PNorm = 55.3421, GNorm = 2.4040, lr_0 = 5.4039e-04
Loss = 4.7468e-01, PNorm = 55.3521, GNorm = 1.4060, lr_0 = 5.4002e-04
Loss = 3.7617e-01, PNorm = 55.3581, GNorm = 1.0930, lr_0 = 5.3965e-04
Loss = 4.4085e-01, PNorm = 55.3680, GNorm = 1.2411, lr_0 = 5.3928e-04
Loss = 3.8032e-01, PNorm = 55.3784, GNorm = 1.3283, lr_0 = 5.3891e-04
Loss = 3.9133e-01, PNorm = 55.3901, GNorm = 0.9160, lr_0 = 5.3854e-04
Loss = 4.4329e-01, PNorm = 55.3972, GNorm = 1.5892, lr_0 = 5.3817e-04
Loss = 3.6878e-01, PNorm = 55.4018, GNorm = 1.0266, lr_0 = 5.3781e-04
Loss = 4.3494e-01, PNorm = 55.4097, GNorm = 1.1274, lr_0 = 5.3744e-04
Loss = 4.3685e-01, PNorm = 55.4205, GNorm = 1.1331, lr_0 = 5.3707e-04
Loss = 3.5736e-01, PNorm = 55.4297, GNorm = 0.6623, lr_0 = 5.3670e-04
Loss = 4.2408e-01, PNorm = 55.4364, GNorm = 1.2625, lr_0 = 5.3633e-04
Loss = 4.1597e-01, PNorm = 55.4461, GNorm = 1.3232, lr_0 = 5.3597e-04
Loss = 4.3829e-01, PNorm = 55.4598, GNorm = 1.4750, lr_0 = 5.3560e-04
Loss = 4.1605e-01, PNorm = 55.4753, GNorm = 1.8689, lr_0 = 5.3523e-04
Loss = 4.0589e-01, PNorm = 55.4863, GNorm = 1.3888, lr_0 = 5.3486e-04
Loss = 4.2414e-01, PNorm = 55.4981, GNorm = 1.1126, lr_0 = 5.3450e-04
Loss = 4.3065e-01, PNorm = 55.5103, GNorm = 1.7884, lr_0 = 5.3413e-04
Loss = 4.0167e-01, PNorm = 55.5210, GNorm = 1.4005, lr_0 = 5.3377e-04
Loss = 3.9526e-01, PNorm = 55.5344, GNorm = 1.8465, lr_0 = 5.3340e-04
Loss = 3.8048e-01, PNorm = 55.5449, GNorm = 1.4656, lr_0 = 5.3304e-04
Loss = 4.6408e-01, PNorm = 55.5524, GNorm = 1.0433, lr_0 = 5.3267e-04
Loss = 4.0808e-01, PNorm = 55.5676, GNorm = 2.5326, lr_0 = 5.3231e-04
Loss = 5.1765e-01, PNorm = 55.5771, GNorm = 1.4113, lr_0 = 5.3194e-04
Loss = 4.3519e-01, PNorm = 55.5891, GNorm = 2.2025, lr_0 = 5.3158e-04
Loss = 4.1788e-01, PNorm = 55.5940, GNorm = 1.3103, lr_0 = 5.3121e-04
Loss = 4.4368e-01, PNorm = 55.6072, GNorm = 1.6352, lr_0 = 5.3085e-04
Loss = 4.2897e-01, PNorm = 55.6182, GNorm = 1.4362, lr_0 = 5.3048e-04
Loss = 4.0177e-01, PNorm = 55.6285, GNorm = 2.0771, lr_0 = 5.3012e-04
Loss = 4.1467e-01, PNorm = 55.6358, GNorm = 1.5539, lr_0 = 5.2976e-04
Loss = 4.1264e-01, PNorm = 55.6469, GNorm = 1.4422, lr_0 = 5.2939e-04
Loss = 4.9299e-01, PNorm = 55.6607, GNorm = 1.7185, lr_0 = 5.2903e-04
Loss = 4.1830e-01, PNorm = 55.6715, GNorm = 1.5546, lr_0 = 5.2867e-04
Loss = 3.7603e-01, PNorm = 55.6794, GNorm = 1.2445, lr_0 = 5.2831e-04
Loss = 4.5789e-01, PNorm = 55.6870, GNorm = 2.2929, lr_0 = 5.2795e-04
Loss = 4.4875e-01, PNorm = 55.6968, GNorm = 1.2715, lr_0 = 5.2758e-04
Loss = 3.8842e-01, PNorm = 55.7090, GNorm = 1.2383, lr_0 = 5.2722e-04
Loss = 4.0644e-01, PNorm = 55.7201, GNorm = 1.1027, lr_0 = 5.2686e-04
Loss = 3.8947e-01, PNorm = 55.7236, GNorm = 0.9684, lr_0 = 5.2650e-04
Loss = 4.0785e-01, PNorm = 55.7335, GNorm = 1.2518, lr_0 = 5.2614e-04
Loss = 3.8779e-01, PNorm = 55.7456, GNorm = 1.1242, lr_0 = 5.2578e-04
Loss = 4.4949e-01, PNorm = 55.7574, GNorm = 1.1849, lr_0 = 5.2542e-04
Loss = 4.2215e-01, PNorm = 55.7623, GNorm = 1.9077, lr_0 = 5.2506e-04
Loss = 4.2269e-01, PNorm = 55.7718, GNorm = 1.8880, lr_0 = 5.2470e-04
Loss = 4.5583e-01, PNorm = 55.7815, GNorm = 1.6885, lr_0 = 5.2434e-04
Loss = 4.1164e-01, PNorm = 55.7934, GNorm = 1.6752, lr_0 = 5.2398e-04
Loss = 3.9387e-01, PNorm = 55.8017, GNorm = 1.3158, lr_0 = 5.2362e-04
Loss = 3.7800e-01, PNorm = 55.8042, GNorm = 1.2180, lr_0 = 5.2326e-04
Loss = 4.3446e-01, PNorm = 55.8127, GNorm = 1.8760, lr_0 = 5.2290e-04
Loss = 4.3778e-01, PNorm = 55.8241, GNorm = 1.1346, lr_0 = 5.2255e-04
Loss = 3.9228e-01, PNorm = 55.8329, GNorm = 1.5983, lr_0 = 5.2219e-04
Loss = 4.6316e-01, PNorm = 55.8352, GNorm = 1.4544, lr_0 = 5.2183e-04
Loss = 4.4221e-01, PNorm = 55.8460, GNorm = 1.5113, lr_0 = 5.2147e-04
Loss = 4.1045e-01, PNorm = 55.8505, GNorm = 1.1126, lr_0 = 5.2112e-04
Loss = 4.2690e-01, PNorm = 55.8596, GNorm = 1.3557, lr_0 = 5.2076e-04
Loss = 3.7845e-01, PNorm = 55.8716, GNorm = 1.0668, lr_0 = 5.2040e-04
Loss = 4.1472e-01, PNorm = 55.8723, GNorm = 1.3339, lr_0 = 5.2005e-04
Loss = 3.9842e-01, PNorm = 55.8772, GNorm = 1.2096, lr_0 = 5.1969e-04
Loss = 4.3408e-01, PNorm = 55.8800, GNorm = 1.1293, lr_0 = 5.1933e-04
Loss = 4.3567e-01, PNorm = 55.8860, GNorm = 1.1642, lr_0 = 5.1898e-04
Loss = 4.0092e-01, PNorm = 55.8968, GNorm = 1.4911, lr_0 = 5.1862e-04
Loss = 4.2957e-01, PNorm = 55.9078, GNorm = 0.9953, lr_0 = 5.1827e-04
Loss = 4.1055e-01, PNorm = 55.9209, GNorm = 1.1193, lr_0 = 5.1791e-04
Validation mae = 0.115757
Epoch 10
Loss = 4.2137e-01, PNorm = 55.9305, GNorm = 1.2867, lr_0 = 5.1756e-04
Loss = 4.1967e-01, PNorm = 55.9421, GNorm = 1.5744, lr_0 = 5.1720e-04
Loss = 4.3178e-01, PNorm = 55.9503, GNorm = 1.4212, lr_0 = 5.1685e-04
Loss = 3.7874e-01, PNorm = 55.9595, GNorm = 1.1400, lr_0 = 5.1649e-04
Loss = 4.1692e-01, PNorm = 55.9646, GNorm = 1.0295, lr_0 = 5.1614e-04
Loss = 3.7749e-01, PNorm = 55.9690, GNorm = 1.4177, lr_0 = 5.1579e-04
Loss = 4.6134e-01, PNorm = 55.9736, GNorm = 2.1231, lr_0 = 5.1543e-04
Loss = 4.2952e-01, PNorm = 55.9832, GNorm = 1.6338, lr_0 = 5.1508e-04
Loss = 4.2142e-01, PNorm = 55.9897, GNorm = 1.2911, lr_0 = 5.1473e-04
Loss = 3.5955e-01, PNorm = 55.9964, GNorm = 1.3537, lr_0 = 5.1437e-04
Loss = 4.8302e-01, PNorm = 56.0019, GNorm = 1.2461, lr_0 = 5.1402e-04
Loss = 4.5208e-01, PNorm = 56.0090, GNorm = 2.3668, lr_0 = 5.1367e-04
Loss = 4.4385e-01, PNorm = 56.0212, GNorm = 1.7073, lr_0 = 5.1332e-04
Loss = 4.0446e-01, PNorm = 56.0369, GNorm = 1.4417, lr_0 = 5.1297e-04
Loss = 3.9606e-01, PNorm = 56.0494, GNorm = 1.2201, lr_0 = 5.1262e-04
Loss = 3.5550e-01, PNorm = 56.0540, GNorm = 1.1017, lr_0 = 5.1226e-04
Loss = 4.0394e-01, PNorm = 56.0643, GNorm = 1.3215, lr_0 = 5.1191e-04
Loss = 4.3887e-01, PNorm = 56.0735, GNorm = 1.4819, lr_0 = 5.1156e-04
Loss = 4.2372e-01, PNorm = 56.0818, GNorm = 0.9499, lr_0 = 5.1121e-04
Loss = 3.6579e-01, PNorm = 56.0884, GNorm = 1.0332, lr_0 = 5.1086e-04
Loss = 4.5201e-01, PNorm = 56.0908, GNorm = 1.2708, lr_0 = 5.1051e-04
Loss = 3.9636e-01, PNorm = 56.1015, GNorm = 1.4934, lr_0 = 5.1016e-04
Loss = 3.8244e-01, PNorm = 56.1111, GNorm = 2.1480, lr_0 = 5.0981e-04
Loss = 4.3122e-01, PNorm = 56.1185, GNorm = 1.6538, lr_0 = 5.0946e-04
Loss = 3.4763e-01, PNorm = 56.1237, GNorm = 2.2774, lr_0 = 5.0911e-04
Loss = 3.6623e-01, PNorm = 56.1337, GNorm = 1.5690, lr_0 = 5.0877e-04
Loss = 4.7944e-01, PNorm = 56.1412, GNorm = 1.3195, lr_0 = 5.0842e-04
Loss = 4.4737e-01, PNorm = 56.1481, GNorm = 1.8060, lr_0 = 5.0807e-04
Loss = 4.3235e-01, PNorm = 56.1515, GNorm = 1.1512, lr_0 = 5.0772e-04
Loss = 3.9142e-01, PNorm = 56.1613, GNorm = 1.0182, lr_0 = 5.0737e-04
Loss = 4.7822e-01, PNorm = 56.1707, GNorm = 1.6247, lr_0 = 5.0703e-04
Loss = 3.8256e-01, PNorm = 56.1824, GNorm = 1.0942, lr_0 = 5.0668e-04
Loss = 4.1121e-01, PNorm = 56.1906, GNorm = 1.3689, lr_0 = 5.0633e-04
Loss = 4.8166e-01, PNorm = 56.2031, GNorm = 2.0737, lr_0 = 5.0598e-04
Loss = 4.2853e-01, PNorm = 56.2156, GNorm = 1.2413, lr_0 = 5.0564e-04
Loss = 4.4668e-01, PNorm = 56.2292, GNorm = 1.5627, lr_0 = 5.0529e-04
Loss = 3.8064e-01, PNorm = 56.2452, GNorm = 0.8486, lr_0 = 5.0494e-04
Loss = 4.5015e-01, PNorm = 56.2587, GNorm = 2.3668, lr_0 = 5.0460e-04
Loss = 4.2400e-01, PNorm = 56.2659, GNorm = 1.1493, lr_0 = 5.0425e-04
Loss = 4.1496e-01, PNorm = 56.2759, GNorm = 2.0379, lr_0 = 5.0391e-04
Loss = 4.2715e-01, PNorm = 56.2859, GNorm = 1.6628, lr_0 = 5.0356e-04
Loss = 4.2679e-01, PNorm = 56.2959, GNorm = 1.3228, lr_0 = 5.0322e-04
Loss = 4.1730e-01, PNorm = 56.2979, GNorm = 1.6723, lr_0 = 5.0287e-04
Loss = 4.4106e-01, PNorm = 56.3092, GNorm = 1.3955, lr_0 = 5.0253e-04
Loss = 3.3879e-01, PNorm = 56.3160, GNorm = 1.4093, lr_0 = 5.0218e-04
Loss = 4.6462e-01, PNorm = 56.3265, GNorm = 1.0869, lr_0 = 5.0184e-04
Loss = 3.8386e-01, PNorm = 56.3419, GNorm = 1.1110, lr_0 = 5.0150e-04
Loss = 3.7256e-01, PNorm = 56.3520, GNorm = 1.4114, lr_0 = 5.0115e-04
Loss = 4.9109e-01, PNorm = 56.3613, GNorm = 1.5710, lr_0 = 5.0081e-04
Loss = 4.5072e-01, PNorm = 56.3721, GNorm = 1.1462, lr_0 = 5.0047e-04
Loss = 3.6411e-01, PNorm = 56.3833, GNorm = 2.2777, lr_0 = 5.0012e-04
Loss = 3.9827e-01, PNorm = 56.3900, GNorm = 1.4377, lr_0 = 4.9978e-04
Loss = 4.7314e-01, PNorm = 56.3967, GNorm = 1.3208, lr_0 = 4.9944e-04
Loss = 4.1650e-01, PNorm = 56.4076, GNorm = 1.3572, lr_0 = 4.9910e-04
Loss = 4.2078e-01, PNorm = 56.4158, GNorm = 1.9364, lr_0 = 4.9875e-04
Loss = 3.7327e-01, PNorm = 56.4277, GNorm = 0.8412, lr_0 = 4.9841e-04
Loss = 3.6914e-01, PNorm = 56.4375, GNorm = 2.0637, lr_0 = 4.9807e-04
Loss = 4.3960e-01, PNorm = 56.4417, GNorm = 1.7943, lr_0 = 4.9773e-04
Loss = 3.9312e-01, PNorm = 56.4460, GNorm = 1.1639, lr_0 = 4.9739e-04
Loss = 4.2016e-01, PNorm = 56.4584, GNorm = 1.5782, lr_0 = 4.9705e-04
Loss = 3.8476e-01, PNorm = 56.4698, GNorm = 1.7943, lr_0 = 4.9671e-04
Loss = 3.7591e-01, PNorm = 56.4813, GNorm = 1.8932, lr_0 = 4.9637e-04
Loss = 4.3662e-01, PNorm = 56.4941, GNorm = 1.5035, lr_0 = 4.9603e-04
Loss = 4.0102e-01, PNorm = 56.5061, GNorm = 1.7165, lr_0 = 4.9569e-04
Loss = 4.0130e-01, PNorm = 56.5136, GNorm = 1.3409, lr_0 = 4.9535e-04
Loss = 4.2231e-01, PNorm = 56.5228, GNorm = 1.0370, lr_0 = 4.9501e-04
Loss = 3.8065e-01, PNorm = 56.5316, GNorm = 1.0368, lr_0 = 4.9467e-04
Loss = 4.0763e-01, PNorm = 56.5384, GNorm = 1.2326, lr_0 = 4.9433e-04
Loss = 3.9426e-01, PNorm = 56.5409, GNorm = 0.9885, lr_0 = 4.9399e-04
Loss = 4.1431e-01, PNorm = 56.5503, GNorm = 1.1928, lr_0 = 4.9365e-04
Loss = 4.3630e-01, PNorm = 56.5577, GNorm = 1.4210, lr_0 = 4.9332e-04
Loss = 4.0099e-01, PNorm = 56.5623, GNorm = 1.3322, lr_0 = 4.9298e-04
Loss = 4.2443e-01, PNorm = 56.5723, GNorm = 1.1841, lr_0 = 4.9264e-04
Loss = 3.7983e-01, PNorm = 56.5857, GNorm = 1.3925, lr_0 = 4.9230e-04
Loss = 4.2243e-01, PNorm = 56.5890, GNorm = 1.2916, lr_0 = 4.9197e-04
Loss = 4.1916e-01, PNorm = 56.6003, GNorm = 2.1157, lr_0 = 4.9163e-04
Loss = 4.7707e-01, PNorm = 56.6120, GNorm = 1.7102, lr_0 = 4.9129e-04
Loss = 4.6421e-01, PNorm = 56.6201, GNorm = 0.9362, lr_0 = 4.9095e-04
Loss = 4.3698e-01, PNorm = 56.6288, GNorm = 1.4710, lr_0 = 4.9062e-04
Loss = 4.3316e-01, PNorm = 56.6383, GNorm = 1.7888, lr_0 = 4.9028e-04
Loss = 4.0889e-01, PNorm = 56.6528, GNorm = 1.7148, lr_0 = 4.8995e-04
Loss = 3.8083e-01, PNorm = 56.6588, GNorm = 1.1706, lr_0 = 4.8961e-04
Loss = 4.5846e-01, PNorm = 56.6654, GNorm = 1.4918, lr_0 = 4.8928e-04
Loss = 4.8992e-01, PNorm = 56.6733, GNorm = 2.8341, lr_0 = 4.8894e-04
Loss = 4.7120e-01, PNorm = 56.6843, GNorm = 1.7678, lr_0 = 4.8861e-04
Loss = 3.9298e-01, PNorm = 56.6944, GNorm = 1.0685, lr_0 = 4.8827e-04
Loss = 4.3451e-01, PNorm = 56.7024, GNorm = 1.2021, lr_0 = 4.8794e-04
Loss = 4.8917e-01, PNorm = 56.7115, GNorm = 1.5890, lr_0 = 4.8760e-04
Loss = 3.7404e-01, PNorm = 56.7206, GNorm = 1.4874, lr_0 = 4.8727e-04
Loss = 4.3572e-01, PNorm = 56.7214, GNorm = 1.7430, lr_0 = 4.8693e-04
Loss = 3.7884e-01, PNorm = 56.7329, GNorm = 1.2094, lr_0 = 4.8660e-04
Loss = 4.1546e-01, PNorm = 56.7417, GNorm = 1.2784, lr_0 = 4.8627e-04
Loss = 4.1325e-01, PNorm = 56.7514, GNorm = 1.0630, lr_0 = 4.8593e-04
Loss = 4.2324e-01, PNorm = 56.7598, GNorm = 1.7983, lr_0 = 4.8560e-04
Loss = 4.1969e-01, PNorm = 56.7694, GNorm = 1.4945, lr_0 = 4.8527e-04
Loss = 3.9812e-01, PNorm = 56.7754, GNorm = 1.4608, lr_0 = 4.8494e-04
Loss = 4.6776e-01, PNorm = 56.7876, GNorm = 1.5051, lr_0 = 4.8460e-04
Loss = 4.3688e-01, PNorm = 56.8003, GNorm = 1.5897, lr_0 = 4.8427e-04
Loss = 4.4071e-01, PNorm = 56.8120, GNorm = 2.4591, lr_0 = 4.8394e-04
Loss = 4.3978e-01, PNorm = 56.8240, GNorm = 1.5047, lr_0 = 4.8361e-04
Loss = 3.9718e-01, PNorm = 56.8325, GNorm = 1.1806, lr_0 = 4.8328e-04
Loss = 3.8233e-01, PNorm = 56.8431, GNorm = 2.0179, lr_0 = 4.8295e-04
Loss = 3.7434e-01, PNorm = 56.8519, GNorm = 1.1933, lr_0 = 4.8262e-04
Loss = 4.2527e-01, PNorm = 56.8578, GNorm = 1.2243, lr_0 = 4.8228e-04
Loss = 4.7074e-01, PNorm = 56.8689, GNorm = 1.5658, lr_0 = 4.8195e-04
Loss = 4.1391e-01, PNorm = 56.8801, GNorm = 1.6976, lr_0 = 4.8162e-04
Loss = 4.0219e-01, PNorm = 56.8878, GNorm = 1.0176, lr_0 = 4.8129e-04
Loss = 3.9646e-01, PNorm = 56.8993, GNorm = 1.6578, lr_0 = 4.8096e-04
Loss = 3.8681e-01, PNorm = 56.9102, GNorm = 1.7980, lr_0 = 4.8064e-04
Loss = 4.3545e-01, PNorm = 56.9200, GNorm = 0.9317, lr_0 = 4.8031e-04
Loss = 3.6941e-01, PNorm = 56.9296, GNorm = 1.7774, lr_0 = 4.7998e-04
Loss = 4.0995e-01, PNorm = 56.9404, GNorm = 1.4365, lr_0 = 4.7965e-04
Loss = 3.9595e-01, PNorm = 56.9464, GNorm = 1.2854, lr_0 = 4.7932e-04
Loss = 3.7569e-01, PNorm = 56.9556, GNorm = 1.3598, lr_0 = 4.7899e-04
Loss = 4.1760e-01, PNorm = 56.9651, GNorm = 1.8520, lr_0 = 4.7866e-04
Loss = 4.3033e-01, PNorm = 56.9806, GNorm = 0.9289, lr_0 = 4.7833e-04
Loss = 3.8273e-01, PNorm = 56.9895, GNorm = 1.4364, lr_0 = 4.7801e-04
Loss = 3.3520e-01, PNorm = 56.9970, GNorm = 0.9205, lr_0 = 4.7768e-04
Loss = 4.6452e-01, PNorm = 56.9992, GNorm = 1.5600, lr_0 = 4.7735e-04
Loss = 4.0960e-01, PNorm = 57.0095, GNorm = 1.1211, lr_0 = 4.7703e-04
Validation mae = 0.115143
Epoch 11
Loss = 4.1427e-01, PNorm = 57.0203, GNorm = 0.9676, lr_0 = 4.7670e-04
Loss = 4.0528e-01, PNorm = 57.0312, GNorm = 1.2148, lr_0 = 4.7637e-04
Loss = 3.9707e-01, PNorm = 57.0389, GNorm = 0.8903, lr_0 = 4.7605e-04
Loss = 4.0966e-01, PNorm = 57.0500, GNorm = 1.4554, lr_0 = 4.7572e-04
Loss = 3.8406e-01, PNorm = 57.0632, GNorm = 1.4536, lr_0 = 4.7539e-04
Loss = 3.9928e-01, PNorm = 57.0720, GNorm = 1.5768, lr_0 = 4.7507e-04
Loss = 3.8373e-01, PNorm = 57.0843, GNorm = 1.2289, lr_0 = 4.7474e-04
Loss = 4.2579e-01, PNorm = 57.0977, GNorm = 1.7486, lr_0 = 4.7442e-04
Loss = 4.5186e-01, PNorm = 57.1065, GNorm = 0.8616, lr_0 = 4.7409e-04
Loss = 4.1644e-01, PNorm = 57.1198, GNorm = 1.3682, lr_0 = 4.7377e-04
Loss = 4.0358e-01, PNorm = 57.1278, GNorm = 1.2521, lr_0 = 4.7344e-04
Loss = 3.9760e-01, PNorm = 57.1389, GNorm = 1.1911, lr_0 = 4.7312e-04
Loss = 4.3512e-01, PNorm = 57.1525, GNorm = 1.9499, lr_0 = 4.7279e-04
Loss = 3.5004e-01, PNorm = 57.1600, GNorm = 1.4563, lr_0 = 4.7247e-04
Loss = 4.0466e-01, PNorm = 57.1664, GNorm = 1.4075, lr_0 = 4.7215e-04
Loss = 4.1414e-01, PNorm = 57.1703, GNorm = 1.5007, lr_0 = 4.7182e-04
Loss = 4.5305e-01, PNorm = 57.1773, GNorm = 1.1123, lr_0 = 4.7150e-04
Loss = 4.3338e-01, PNorm = 57.1816, GNorm = 2.9219, lr_0 = 4.7118e-04
Loss = 4.3055e-01, PNorm = 57.1957, GNorm = 1.2933, lr_0 = 4.7085e-04
Loss = 3.9748e-01, PNorm = 57.2063, GNorm = 1.0013, lr_0 = 4.7053e-04
Loss = 4.5287e-01, PNorm = 57.2174, GNorm = 1.5739, lr_0 = 4.7021e-04
Loss = 4.1275e-01, PNorm = 57.2236, GNorm = 1.5420, lr_0 = 4.6989e-04
Loss = 4.1274e-01, PNorm = 57.2335, GNorm = 0.9614, lr_0 = 4.6957e-04
Loss = 3.8834e-01, PNorm = 57.2420, GNorm = 1.2589, lr_0 = 4.6924e-04
Loss = 4.1905e-01, PNorm = 57.2449, GNorm = 1.6287, lr_0 = 4.6892e-04
Loss = 4.2120e-01, PNorm = 57.2528, GNorm = 1.1017, lr_0 = 4.6860e-04
Loss = 4.1925e-01, PNorm = 57.2648, GNorm = 2.2540, lr_0 = 4.6828e-04
Loss = 3.5856e-01, PNorm = 57.2734, GNorm = 1.3226, lr_0 = 4.6796e-04
Loss = 4.1534e-01, PNorm = 57.2808, GNorm = 1.2347, lr_0 = 4.6764e-04
Loss = 4.1706e-01, PNorm = 57.2889, GNorm = 1.7237, lr_0 = 4.6732e-04
Loss = 3.6954e-01, PNorm = 57.2952, GNorm = 1.4394, lr_0 = 4.6700e-04
Loss = 3.6865e-01, PNorm = 57.3044, GNorm = 1.2745, lr_0 = 4.6668e-04
Loss = 3.9113e-01, PNorm = 57.3139, GNorm = 2.1915, lr_0 = 4.6636e-04
Loss = 4.2040e-01, PNorm = 57.3237, GNorm = 1.1640, lr_0 = 4.6604e-04
Loss = 4.3877e-01, PNorm = 57.3316, GNorm = 1.2008, lr_0 = 4.6572e-04
Loss = 4.3317e-01, PNorm = 57.3452, GNorm = 1.5483, lr_0 = 4.6540e-04
Loss = 4.3512e-01, PNorm = 57.3486, GNorm = 1.3452, lr_0 = 4.6508e-04
Loss = 4.6065e-01, PNorm = 57.3581, GNorm = 0.8954, lr_0 = 4.6476e-04
Loss = 4.0471e-01, PNorm = 57.3660, GNorm = 1.6423, lr_0 = 4.6445e-04
Loss = 4.5943e-01, PNorm = 57.3739, GNorm = 2.6215, lr_0 = 4.6413e-04
Loss = 3.6326e-01, PNorm = 57.3769, GNorm = 1.0986, lr_0 = 4.6381e-04
Loss = 4.0524e-01, PNorm = 57.3812, GNorm = 1.2896, lr_0 = 4.6349e-04
Loss = 4.1141e-01, PNorm = 57.3851, GNorm = 2.0549, lr_0 = 4.6317e-04
Loss = 3.9266e-01, PNorm = 57.3961, GNorm = 1.1385, lr_0 = 4.6286e-04
Loss = 4.1264e-01, PNorm = 57.4074, GNorm = 1.2073, lr_0 = 4.6254e-04
Loss = 4.2933e-01, PNorm = 57.4177, GNorm = 1.3117, lr_0 = 4.6222e-04
Loss = 4.2892e-01, PNorm = 57.4242, GNorm = 1.1936, lr_0 = 4.6191e-04
Loss = 4.3228e-01, PNorm = 57.4272, GNorm = 2.0018, lr_0 = 4.6159e-04
Loss = 4.2709e-01, PNorm = 57.4333, GNorm = 2.0679, lr_0 = 4.6127e-04
Loss = 3.9377e-01, PNorm = 57.4405, GNorm = 1.6091, lr_0 = 4.6096e-04
Loss = 3.4695e-01, PNorm = 57.4477, GNorm = 1.3403, lr_0 = 4.6064e-04
Loss = 3.7249e-01, PNorm = 57.4577, GNorm = 1.3163, lr_0 = 4.6033e-04
Loss = 3.4797e-01, PNorm = 57.4623, GNorm = 1.1952, lr_0 = 4.6001e-04
Loss = 4.2581e-01, PNorm = 57.4704, GNorm = 1.3325, lr_0 = 4.5970e-04
Loss = 4.5469e-01, PNorm = 57.4786, GNorm = 1.7951, lr_0 = 4.5938e-04
Loss = 3.7783e-01, PNorm = 57.4812, GNorm = 1.1966, lr_0 = 4.5907e-04
Loss = 4.2566e-01, PNorm = 57.4863, GNorm = 1.2289, lr_0 = 4.5875e-04
Loss = 4.1419e-01, PNorm = 57.4873, GNorm = 1.4156, lr_0 = 4.5844e-04
Loss = 3.7366e-01, PNorm = 57.4896, GNorm = 1.1425, lr_0 = 4.5812e-04
Loss = 4.0682e-01, PNorm = 57.4952, GNorm = 1.0985, lr_0 = 4.5781e-04
Loss = 4.6995e-01, PNorm = 57.5076, GNorm = 1.2377, lr_0 = 4.5750e-04
Loss = 3.7323e-01, PNorm = 57.5109, GNorm = 1.3378, lr_0 = 4.5718e-04
Loss = 3.8297e-01, PNorm = 57.5113, GNorm = 1.1209, lr_0 = 4.5687e-04
Loss = 4.3786e-01, PNorm = 57.5187, GNorm = 1.4261, lr_0 = 4.5656e-04
Loss = 3.8801e-01, PNorm = 57.5298, GNorm = 0.8698, lr_0 = 4.5624e-04
Loss = 4.0038e-01, PNorm = 57.5373, GNorm = 1.6559, lr_0 = 4.5593e-04
Loss = 4.4478e-01, PNorm = 57.5482, GNorm = 1.3360, lr_0 = 4.5562e-04
Loss = 4.0282e-01, PNorm = 57.5588, GNorm = 1.4816, lr_0 = 4.5531e-04
Loss = 3.4753e-01, PNorm = 57.5687, GNorm = 1.6186, lr_0 = 4.5499e-04
Loss = 4.2504e-01, PNorm = 57.5781, GNorm = 1.7539, lr_0 = 4.5468e-04
Loss = 3.6984e-01, PNorm = 57.5892, GNorm = 1.4851, lr_0 = 4.5437e-04
Loss = 4.1681e-01, PNorm = 57.5968, GNorm = 1.1866, lr_0 = 4.5406e-04
Loss = 4.3478e-01, PNorm = 57.6050, GNorm = 1.6295, lr_0 = 4.5375e-04
Loss = 5.0007e-01, PNorm = 57.6138, GNorm = 1.1215, lr_0 = 4.5344e-04
Loss = 3.3114e-01, PNorm = 57.6200, GNorm = 1.2217, lr_0 = 4.5313e-04
Loss = 4.1532e-01, PNorm = 57.6275, GNorm = 1.4235, lr_0 = 4.5282e-04
Loss = 4.0552e-01, PNorm = 57.6340, GNorm = 1.4667, lr_0 = 4.5251e-04
Loss = 3.7934e-01, PNorm = 57.6417, GNorm = 2.2756, lr_0 = 4.5220e-04
Loss = 4.5166e-01, PNorm = 57.6500, GNorm = 1.2471, lr_0 = 4.5189e-04
Loss = 4.0948e-01, PNorm = 57.6609, GNorm = 1.1204, lr_0 = 4.5158e-04
Loss = 4.1592e-01, PNorm = 57.6698, GNorm = 1.6279, lr_0 = 4.5127e-04
Loss = 4.0805e-01, PNorm = 57.6740, GNorm = 1.3808, lr_0 = 4.5096e-04
Loss = 4.1894e-01, PNorm = 57.6788, GNorm = 1.3019, lr_0 = 4.5065e-04
Loss = 3.5666e-01, PNorm = 57.6896, GNorm = 1.3419, lr_0 = 4.5034e-04
Loss = 3.7223e-01, PNorm = 57.7002, GNorm = 1.1904, lr_0 = 4.5003e-04
Loss = 3.8139e-01, PNorm = 57.7093, GNorm = 1.0847, lr_0 = 4.4972e-04
Loss = 3.6100e-01, PNorm = 57.7113, GNorm = 1.2364, lr_0 = 4.4942e-04
Loss = 3.6322e-01, PNorm = 57.7126, GNorm = 1.1952, lr_0 = 4.4911e-04
Loss = 4.0786e-01, PNorm = 57.7183, GNorm = 1.2766, lr_0 = 4.4880e-04
Loss = 4.1476e-01, PNorm = 57.7239, GNorm = 1.8743, lr_0 = 4.4849e-04
Loss = 3.9961e-01, PNorm = 57.7342, GNorm = 1.1218, lr_0 = 4.4819e-04
Loss = 3.7523e-01, PNorm = 57.7380, GNorm = 1.7298, lr_0 = 4.4788e-04
Loss = 3.5252e-01, PNorm = 57.7476, GNorm = 1.0668, lr_0 = 4.4757e-04
Loss = 3.7534e-01, PNorm = 57.7549, GNorm = 1.5235, lr_0 = 4.4727e-04
Loss = 3.9249e-01, PNorm = 57.7618, GNorm = 1.9859, lr_0 = 4.4696e-04
Loss = 3.8517e-01, PNorm = 57.7712, GNorm = 1.5527, lr_0 = 4.4665e-04
Loss = 4.4400e-01, PNorm = 57.7806, GNorm = 1.1947, lr_0 = 4.4635e-04
Loss = 4.7472e-01, PNorm = 57.7899, GNorm = 1.6671, lr_0 = 4.4604e-04
Loss = 4.2305e-01, PNorm = 57.7958, GNorm = 1.2314, lr_0 = 4.4574e-04
Loss = 4.6018e-01, PNorm = 57.8076, GNorm = 1.3546, lr_0 = 4.4543e-04
Loss = 4.1864e-01, PNorm = 57.8174, GNorm = 0.9988, lr_0 = 4.4513e-04
Loss = 3.9681e-01, PNorm = 57.8209, GNorm = 1.4584, lr_0 = 4.4482e-04
Loss = 4.0991e-01, PNorm = 57.8319, GNorm = 1.6236, lr_0 = 4.4452e-04
Loss = 3.6894e-01, PNorm = 57.8428, GNorm = 1.4986, lr_0 = 4.4421e-04
Loss = 4.0067e-01, PNorm = 57.8469, GNorm = 1.0856, lr_0 = 4.4391e-04
Loss = 3.2691e-01, PNorm = 57.8568, GNorm = 1.0345, lr_0 = 4.4360e-04
Loss = 4.2956e-01, PNorm = 57.8666, GNorm = 1.4484, lr_0 = 4.4330e-04
Loss = 4.3732e-01, PNorm = 57.8774, GNorm = 1.6703, lr_0 = 4.4299e-04
Loss = 4.1025e-01, PNorm = 57.8871, GNorm = 1.6172, lr_0 = 4.4269e-04
Loss = 3.8760e-01, PNorm = 57.8947, GNorm = 1.3843, lr_0 = 4.4239e-04
Loss = 4.4059e-01, PNorm = 57.8970, GNorm = 1.1657, lr_0 = 4.4209e-04
Loss = 3.8334e-01, PNorm = 57.9050, GNorm = 1.7763, lr_0 = 4.4178e-04
Loss = 3.5073e-01, PNorm = 57.9104, GNorm = 1.2183, lr_0 = 4.4148e-04
Loss = 3.7847e-01, PNorm = 57.9176, GNorm = 1.0792, lr_0 = 4.4118e-04
Loss = 3.9513e-01, PNorm = 57.9240, GNorm = 1.3393, lr_0 = 4.4088e-04
Loss = 4.9384e-01, PNorm = 57.9277, GNorm = 1.4137, lr_0 = 4.4057e-04
Loss = 4.1983e-01, PNorm = 57.9346, GNorm = 1.1154, lr_0 = 4.4027e-04
Loss = 4.3902e-01, PNorm = 57.9393, GNorm = 1.2140, lr_0 = 4.3997e-04
Loss = 3.7624e-01, PNorm = 57.9481, GNorm = 1.2213, lr_0 = 4.3967e-04
Loss = 3.5335e-01, PNorm = 57.9524, GNorm = 1.1470, lr_0 = 4.3937e-04
Validation mae = 0.113884
Epoch 12
Loss = 3.8983e-01, PNorm = 57.9595, GNorm = 1.1069, lr_0 = 4.3907e-04
Loss = 3.9666e-01, PNorm = 57.9659, GNorm = 1.2905, lr_0 = 4.3877e-04
Loss = 4.2196e-01, PNorm = 57.9741, GNorm = 1.1091, lr_0 = 4.3846e-04
Loss = 3.8912e-01, PNorm = 57.9821, GNorm = 1.4817, lr_0 = 4.3816e-04
Loss = 3.6416e-01, PNorm = 57.9935, GNorm = 1.6100, lr_0 = 4.3786e-04
Loss = 3.7578e-01, PNorm = 58.0008, GNorm = 1.7480, lr_0 = 4.3756e-04
Loss = 3.7587e-01, PNorm = 58.0100, GNorm = 1.3844, lr_0 = 4.3726e-04
Loss = 3.8718e-01, PNorm = 58.0146, GNorm = 1.7314, lr_0 = 4.3696e-04
Loss = 3.5319e-01, PNorm = 58.0248, GNorm = 1.8268, lr_0 = 4.3667e-04
Loss = 3.8239e-01, PNorm = 58.0301, GNorm = 1.0605, lr_0 = 4.3637e-04
Loss = 4.2995e-01, PNorm = 58.0342, GNorm = 1.2026, lr_0 = 4.3607e-04
Loss = 3.7327e-01, PNorm = 58.0455, GNorm = 1.8321, lr_0 = 4.3577e-04
Loss = 3.4887e-01, PNorm = 58.0523, GNorm = 1.4001, lr_0 = 4.3547e-04
Loss = 4.1152e-01, PNorm = 58.0582, GNorm = 1.0958, lr_0 = 4.3517e-04
Loss = 3.7829e-01, PNorm = 58.0661, GNorm = 1.3677, lr_0 = 4.3487e-04
Loss = 4.3576e-01, PNorm = 58.0724, GNorm = 1.3058, lr_0 = 4.3458e-04
Loss = 3.7167e-01, PNorm = 58.0869, GNorm = 1.3054, lr_0 = 4.3428e-04
Loss = 4.1704e-01, PNorm = 58.0932, GNorm = 1.5351, lr_0 = 4.3398e-04
Loss = 5.2919e-01, PNorm = 58.0986, GNorm = 2.0879, lr_0 = 4.3368e-04
Loss = 4.3591e-01, PNorm = 58.1068, GNorm = 1.3842, lr_0 = 4.3339e-04
Loss = 4.4352e-01, PNorm = 58.1075, GNorm = 1.5452, lr_0 = 4.3309e-04
Loss = 4.0465e-01, PNorm = 58.1146, GNorm = 1.0783, lr_0 = 4.3279e-04
Loss = 3.9483e-01, PNorm = 58.1169, GNorm = 1.6673, lr_0 = 4.3250e-04
Loss = 4.2903e-01, PNorm = 58.1217, GNorm = 1.4192, lr_0 = 4.3220e-04
Loss = 3.5697e-01, PNorm = 58.1326, GNorm = 0.9542, lr_0 = 4.3190e-04
Loss = 3.8216e-01, PNorm = 58.1392, GNorm = 1.3486, lr_0 = 4.3161e-04
Loss = 4.1453e-01, PNorm = 58.1502, GNorm = 1.1734, lr_0 = 4.3131e-04
Loss = 3.7318e-01, PNorm = 58.1572, GNorm = 1.2319, lr_0 = 4.3102e-04
Loss = 3.9064e-01, PNorm = 58.1642, GNorm = 1.1627, lr_0 = 4.3072e-04
Loss = 3.7259e-01, PNorm = 58.1700, GNorm = 1.5326, lr_0 = 4.3043e-04
Loss = 4.0509e-01, PNorm = 58.1762, GNorm = 1.6106, lr_0 = 4.3013e-04
Loss = 3.8840e-01, PNorm = 58.1880, GNorm = 1.0477, lr_0 = 4.2984e-04
Loss = 4.0107e-01, PNorm = 58.1976, GNorm = 2.2496, lr_0 = 4.2954e-04
Loss = 3.8317e-01, PNorm = 58.2044, GNorm = 1.0686, lr_0 = 4.2925e-04
Loss = 3.6226e-01, PNorm = 58.2056, GNorm = 1.8534, lr_0 = 4.2895e-04
Loss = 3.6772e-01, PNorm = 58.2108, GNorm = 1.9921, lr_0 = 4.2866e-04
Loss = 4.0409e-01, PNorm = 58.2175, GNorm = 1.2763, lr_0 = 4.2837e-04
Loss = 3.6833e-01, PNorm = 58.2297, GNorm = 1.1163, lr_0 = 4.2807e-04
Loss = 3.6078e-01, PNorm = 58.2342, GNorm = 1.2884, lr_0 = 4.2778e-04
Loss = 3.7955e-01, PNorm = 58.2419, GNorm = 1.9105, lr_0 = 4.2749e-04
Loss = 4.0115e-01, PNorm = 58.2469, GNorm = 1.1208, lr_0 = 4.2719e-04
Loss = 4.4027e-01, PNorm = 58.2547, GNorm = 1.6515, lr_0 = 4.2690e-04
Loss = 3.7366e-01, PNorm = 58.2582, GNorm = 1.4348, lr_0 = 4.2661e-04
Loss = 4.5831e-01, PNorm = 58.2670, GNorm = 1.6982, lr_0 = 4.2632e-04
Loss = 3.5773e-01, PNorm = 58.2723, GNorm = 1.5247, lr_0 = 4.2602e-04
Loss = 4.3456e-01, PNorm = 58.2783, GNorm = 1.4808, lr_0 = 4.2573e-04
Loss = 4.3241e-01, PNorm = 58.2829, GNorm = 1.2240, lr_0 = 4.2544e-04
Loss = 3.6481e-01, PNorm = 58.2911, GNorm = 1.4134, lr_0 = 4.2515e-04
Loss = 3.8641e-01, PNorm = 58.2983, GNorm = 1.7563, lr_0 = 4.2486e-04
Loss = 4.4248e-01, PNorm = 58.3091, GNorm = 1.3484, lr_0 = 4.2457e-04
Loss = 4.3382e-01, PNorm = 58.3156, GNorm = 1.3366, lr_0 = 4.2428e-04
Loss = 4.1427e-01, PNorm = 58.3187, GNorm = 2.1691, lr_0 = 4.2399e-04
Loss = 4.7298e-01, PNorm = 58.3283, GNorm = 1.7501, lr_0 = 4.2370e-04
Loss = 4.2099e-01, PNorm = 58.3402, GNorm = 1.1495, lr_0 = 4.2340e-04
Loss = 4.2588e-01, PNorm = 58.3469, GNorm = 2.2297, lr_0 = 4.2311e-04
Loss = 4.1674e-01, PNorm = 58.3496, GNorm = 1.6297, lr_0 = 4.2283e-04
Loss = 4.5792e-01, PNorm = 58.3528, GNorm = 1.7104, lr_0 = 4.2254e-04
Loss = 3.5602e-01, PNorm = 58.3577, GNorm = 0.8912, lr_0 = 4.2225e-04
Loss = 3.5748e-01, PNorm = 58.3647, GNorm = 1.1833, lr_0 = 4.2196e-04
Loss = 3.8405e-01, PNorm = 58.3695, GNorm = 1.2785, lr_0 = 4.2167e-04
Loss = 3.6913e-01, PNorm = 58.3779, GNorm = 1.4792, lr_0 = 4.2138e-04
Loss = 3.9231e-01, PNorm = 58.3821, GNorm = 1.2573, lr_0 = 4.2109e-04
Loss = 4.0778e-01, PNorm = 58.3863, GNorm = 2.3747, lr_0 = 4.2080e-04
Loss = 4.1130e-01, PNorm = 58.3895, GNorm = 1.2928, lr_0 = 4.2051e-04
Loss = 4.1809e-01, PNorm = 58.3922, GNorm = 1.2297, lr_0 = 4.2023e-04
Loss = 4.0439e-01, PNorm = 58.3973, GNorm = 1.2277, lr_0 = 4.1994e-04
Loss = 3.4835e-01, PNorm = 58.3994, GNorm = 1.2936, lr_0 = 4.1965e-04
Loss = 3.8221e-01, PNorm = 58.4086, GNorm = 1.1557, lr_0 = 4.1936e-04
Loss = 3.8723e-01, PNorm = 58.4160, GNorm = 1.5042, lr_0 = 4.1907e-04
Loss = 3.7273e-01, PNorm = 58.4244, GNorm = 1.5968, lr_0 = 4.1879e-04
Loss = 3.8775e-01, PNorm = 58.4342, GNorm = 1.0702, lr_0 = 4.1850e-04
Loss = 3.6229e-01, PNorm = 58.4410, GNorm = 1.1834, lr_0 = 4.1821e-04
Loss = 4.1263e-01, PNorm = 58.4497, GNorm = 1.2364, lr_0 = 4.1793e-04
Loss = 4.2562e-01, PNorm = 58.4567, GNorm = 1.5546, lr_0 = 4.1764e-04
Loss = 3.7456e-01, PNorm = 58.4664, GNorm = 1.9728, lr_0 = 4.1736e-04
Loss = 3.9137e-01, PNorm = 58.4717, GNorm = 1.4105, lr_0 = 4.1707e-04
Loss = 3.4362e-01, PNorm = 58.4799, GNorm = 1.9367, lr_0 = 4.1678e-04
Loss = 4.1669e-01, PNorm = 58.4914, GNorm = 1.2895, lr_0 = 4.1650e-04
Loss = 3.7460e-01, PNorm = 58.4995, GNorm = 1.1362, lr_0 = 4.1621e-04
Loss = 5.1186e-01, PNorm = 58.5110, GNorm = 1.4104, lr_0 = 4.1593e-04
Loss = 3.9772e-01, PNorm = 58.5241, GNorm = 1.4791, lr_0 = 4.1564e-04
Loss = 4.3180e-01, PNorm = 58.5292, GNorm = 1.3948, lr_0 = 4.1536e-04
Loss = 3.7463e-01, PNorm = 58.5319, GNorm = 1.9597, lr_0 = 4.1507e-04
Loss = 3.9609e-01, PNorm = 58.5354, GNorm = 1.0304, lr_0 = 4.1479e-04
Loss = 3.5924e-01, PNorm = 58.5366, GNorm = 1.1442, lr_0 = 4.1450e-04
Loss = 3.9498e-01, PNorm = 58.5414, GNorm = 1.0176, lr_0 = 4.1422e-04
Loss = 3.9377e-01, PNorm = 58.5477, GNorm = 1.6358, lr_0 = 4.1394e-04
Loss = 4.4925e-01, PNorm = 58.5562, GNorm = 2.2939, lr_0 = 4.1365e-04
Loss = 3.4203e-01, PNorm = 58.5585, GNorm = 1.7892, lr_0 = 4.1337e-04
Loss = 3.9585e-01, PNorm = 58.5596, GNorm = 1.1481, lr_0 = 4.1309e-04
Loss = 4.1289e-01, PNorm = 58.5638, GNorm = 1.2449, lr_0 = 4.1280e-04
Loss = 3.7195e-01, PNorm = 58.5719, GNorm = 1.2861, lr_0 = 4.1252e-04
Loss = 4.2182e-01, PNorm = 58.5782, GNorm = 0.9845, lr_0 = 4.1224e-04
Loss = 3.9008e-01, PNorm = 58.5842, GNorm = 1.2787, lr_0 = 4.1196e-04
Loss = 4.1870e-01, PNorm = 58.5911, GNorm = 1.7729, lr_0 = 4.1167e-04
Loss = 4.1658e-01, PNorm = 58.5976, GNorm = 1.4475, lr_0 = 4.1139e-04
Loss = 3.4315e-01, PNorm = 58.6026, GNorm = 1.2793, lr_0 = 4.1111e-04
Loss = 4.0740e-01, PNorm = 58.6103, GNorm = 1.9684, lr_0 = 4.1083e-04
Loss = 4.0660e-01, PNorm = 58.6151, GNorm = 1.6524, lr_0 = 4.1055e-04
Loss = 3.8762e-01, PNorm = 58.6222, GNorm = 1.5357, lr_0 = 4.1027e-04
Loss = 3.8738e-01, PNorm = 58.6292, GNorm = 1.3228, lr_0 = 4.0998e-04
Loss = 4.7934e-01, PNorm = 58.6365, GNorm = 2.3804, lr_0 = 4.0970e-04
Loss = 3.9945e-01, PNorm = 58.6404, GNorm = 1.4305, lr_0 = 4.0942e-04
Loss = 3.3214e-01, PNorm = 58.6426, GNorm = 1.1327, lr_0 = 4.0914e-04
Loss = 4.0024e-01, PNorm = 58.6518, GNorm = 1.1122, lr_0 = 4.0886e-04
Loss = 3.7658e-01, PNorm = 58.6623, GNorm = 1.6421, lr_0 = 4.0858e-04
Loss = 4.0754e-01, PNorm = 58.6607, GNorm = 1.1805, lr_0 = 4.0830e-04
Loss = 4.1270e-01, PNorm = 58.6634, GNorm = 1.5566, lr_0 = 4.0802e-04
Loss = 4.3140e-01, PNorm = 58.6650, GNorm = 1.2191, lr_0 = 4.0774e-04
Loss = 3.6128e-01, PNorm = 58.6677, GNorm = 1.6107, lr_0 = 4.0746e-04
Loss = 3.8573e-01, PNorm = 58.6749, GNorm = 1.3802, lr_0 = 4.0718e-04
Loss = 4.1244e-01, PNorm = 58.6835, GNorm = 1.8490, lr_0 = 4.0691e-04
Loss = 3.6217e-01, PNorm = 58.6916, GNorm = 1.4379, lr_0 = 4.0663e-04
Loss = 4.4281e-01, PNorm = 58.7024, GNorm = 1.1095, lr_0 = 4.0635e-04
Loss = 4.6005e-01, PNorm = 58.7137, GNorm = 0.9150, lr_0 = 4.0607e-04
Loss = 4.1290e-01, PNorm = 58.7210, GNorm = 0.9951, lr_0 = 4.0579e-04
Loss = 3.9467e-01, PNorm = 58.7308, GNorm = 1.5117, lr_0 = 4.0551e-04
Loss = 4.2822e-01, PNorm = 58.7374, GNorm = 2.5251, lr_0 = 4.0524e-04
Loss = 4.4490e-01, PNorm = 58.7467, GNorm = 1.2027, lr_0 = 4.0496e-04
Loss = 4.4644e-01, PNorm = 58.7473, GNorm = 1.2734, lr_0 = 4.0468e-04
Validation mae = 0.115989
Epoch 13
Loss = 4.0342e-01, PNorm = 58.7557, GNorm = 1.4174, lr_0 = 4.0440e-04
Loss = 3.8880e-01, PNorm = 58.7644, GNorm = 1.4157, lr_0 = 4.0413e-04
Loss = 4.3318e-01, PNorm = 58.7694, GNorm = 1.3151, lr_0 = 4.0385e-04
Loss = 3.9357e-01, PNorm = 58.7764, GNorm = 1.2559, lr_0 = 4.0357e-04
Loss = 4.1297e-01, PNorm = 58.7857, GNorm = 1.4403, lr_0 = 4.0330e-04
Loss = 3.7601e-01, PNorm = 58.7959, GNorm = 1.0341, lr_0 = 4.0302e-04
Loss = 3.8272e-01, PNorm = 58.8035, GNorm = 1.1662, lr_0 = 4.0274e-04
Loss = 4.3529e-01, PNorm = 58.8087, GNorm = 1.2582, lr_0 = 4.0247e-04
Loss = 4.2649e-01, PNorm = 58.8161, GNorm = 1.4814, lr_0 = 4.0219e-04
Loss = 3.9304e-01, PNorm = 58.8202, GNorm = 1.6140, lr_0 = 4.0192e-04
Loss = 3.9480e-01, PNorm = 58.8250, GNorm = 1.3286, lr_0 = 4.0164e-04
Loss = 3.5992e-01, PNorm = 58.8253, GNorm = 1.2305, lr_0 = 4.0137e-04
Loss = 3.7157e-01, PNorm = 58.8284, GNorm = 1.2094, lr_0 = 4.0109e-04
Loss = 3.6457e-01, PNorm = 58.8354, GNorm = 1.3461, lr_0 = 4.0082e-04
Loss = 4.0374e-01, PNorm = 58.8395, GNorm = 1.4384, lr_0 = 4.0054e-04
Loss = 3.9951e-01, PNorm = 58.8468, GNorm = 1.7292, lr_0 = 4.0027e-04
Loss = 4.0261e-01, PNorm = 58.8558, GNorm = 1.3907, lr_0 = 3.9999e-04
Loss = 3.6512e-01, PNorm = 58.8622, GNorm = 1.0701, lr_0 = 3.9972e-04
Loss = 3.6323e-01, PNorm = 58.8685, GNorm = 1.1266, lr_0 = 3.9945e-04
Loss = 3.4375e-01, PNorm = 58.8737, GNorm = 1.1437, lr_0 = 3.9917e-04
Loss = 3.8363e-01, PNorm = 58.8815, GNorm = 0.9873, lr_0 = 3.9890e-04
Loss = 3.9167e-01, PNorm = 58.8911, GNorm = 1.5071, lr_0 = 3.9863e-04
Loss = 3.8648e-01, PNorm = 58.8957, GNorm = 1.0716, lr_0 = 3.9835e-04
Loss = 4.0570e-01, PNorm = 58.9024, GNorm = 1.0575, lr_0 = 3.9808e-04
Loss = 3.6249e-01, PNorm = 58.9088, GNorm = 1.0110, lr_0 = 3.9781e-04
Loss = 3.8620e-01, PNorm = 58.9155, GNorm = 1.7383, lr_0 = 3.9753e-04
Loss = 3.2862e-01, PNorm = 58.9212, GNorm = 2.2571, lr_0 = 3.9726e-04
Loss = 4.1704e-01, PNorm = 58.9268, GNorm = 1.4113, lr_0 = 3.9699e-04
Loss = 4.2332e-01, PNorm = 58.9333, GNorm = 1.3297, lr_0 = 3.9672e-04
Loss = 4.0424e-01, PNorm = 58.9473, GNorm = 1.3996, lr_0 = 3.9645e-04
Loss = 3.9727e-01, PNorm = 58.9559, GNorm = 1.5786, lr_0 = 3.9617e-04
Loss = 3.9250e-01, PNorm = 58.9608, GNorm = 1.8422, lr_0 = 3.9590e-04
Loss = 4.1579e-01, PNorm = 58.9604, GNorm = 1.5093, lr_0 = 3.9563e-04
Loss = 4.1108e-01, PNorm = 58.9721, GNorm = 1.8693, lr_0 = 3.9536e-04
Loss = 3.6746e-01, PNorm = 58.9799, GNorm = 1.4922, lr_0 = 3.9509e-04
Loss = 3.8579e-01, PNorm = 58.9900, GNorm = 0.9548, lr_0 = 3.9482e-04
Loss = 4.3870e-01, PNorm = 58.9918, GNorm = 1.2921, lr_0 = 3.9455e-04
Loss = 4.5247e-01, PNorm = 59.0006, GNorm = 2.1269, lr_0 = 3.9428e-04
Loss = 3.8654e-01, PNorm = 59.0045, GNorm = 1.1276, lr_0 = 3.9401e-04
Loss = 3.3406e-01, PNorm = 59.0106, GNorm = 1.4144, lr_0 = 3.9374e-04
Loss = 4.6953e-01, PNorm = 59.0158, GNorm = 1.5042, lr_0 = 3.9347e-04
Loss = 3.9901e-01, PNorm = 59.0222, GNorm = 1.6512, lr_0 = 3.9320e-04
Loss = 4.2365e-01, PNorm = 59.0270, GNorm = 1.4039, lr_0 = 3.9293e-04
Loss = 3.9726e-01, PNorm = 59.0340, GNorm = 1.1512, lr_0 = 3.9266e-04
Loss = 4.0008e-01, PNorm = 59.0398, GNorm = 1.7893, lr_0 = 3.9239e-04
Loss = 3.8169e-01, PNorm = 59.0451, GNorm = 1.4001, lr_0 = 3.9212e-04
Loss = 4.1525e-01, PNorm = 59.0501, GNorm = 1.7049, lr_0 = 3.9185e-04
Loss = 3.8925e-01, PNorm = 59.0540, GNorm = 1.0535, lr_0 = 3.9159e-04
Loss = 3.8382e-01, PNorm = 59.0617, GNorm = 1.0963, lr_0 = 3.9132e-04
Loss = 3.6947e-01, PNorm = 59.0724, GNorm = 1.1148, lr_0 = 3.9105e-04
Loss = 4.4603e-01, PNorm = 59.0785, GNorm = 1.6088, lr_0 = 3.9078e-04
Loss = 3.5112e-01, PNorm = 59.0847, GNorm = 1.1102, lr_0 = 3.9051e-04
Loss = 3.6271e-01, PNorm = 59.0868, GNorm = 1.4030, lr_0 = 3.9025e-04
Loss = 3.5079e-01, PNorm = 59.0922, GNorm = 1.6273, lr_0 = 3.8998e-04
Loss = 4.2740e-01, PNorm = 59.0996, GNorm = 1.9822, lr_0 = 3.8971e-04
Loss = 3.4795e-01, PNorm = 59.1081, GNorm = 1.1731, lr_0 = 3.8945e-04
Loss = 3.8782e-01, PNorm = 59.1135, GNorm = 1.0830, lr_0 = 3.8918e-04
Loss = 3.8205e-01, PNorm = 59.1141, GNorm = 1.1588, lr_0 = 3.8891e-04
Loss = 3.8892e-01, PNorm = 59.1174, GNorm = 1.2737, lr_0 = 3.8865e-04
Loss = 4.0899e-01, PNorm = 59.1182, GNorm = 1.5463, lr_0 = 3.8838e-04
Loss = 3.5748e-01, PNorm = 59.1250, GNorm = 1.5140, lr_0 = 3.8811e-04
Loss = 3.9959e-01, PNorm = 59.1285, GNorm = 1.2780, lr_0 = 3.8785e-04
Loss = 3.6065e-01, PNorm = 59.1362, GNorm = 1.2196, lr_0 = 3.8758e-04
Loss = 3.8110e-01, PNorm = 59.1392, GNorm = 1.1032, lr_0 = 3.8732e-04
Loss = 3.7754e-01, PNorm = 59.1500, GNorm = 1.3797, lr_0 = 3.8705e-04
Loss = 3.6849e-01, PNorm = 59.1539, GNorm = 1.3232, lr_0 = 3.8679e-04
Loss = 4.1387e-01, PNorm = 59.1546, GNorm = 1.4214, lr_0 = 3.8652e-04
Loss = 4.1147e-01, PNorm = 59.1615, GNorm = 1.6322, lr_0 = 3.8626e-04
Loss = 4.2671e-01, PNorm = 59.1729, GNorm = 1.6144, lr_0 = 3.8599e-04
Loss = 4.1201e-01, PNorm = 59.1736, GNorm = 1.3928, lr_0 = 3.8573e-04
Loss = 3.8807e-01, PNorm = 59.1731, GNorm = 1.1610, lr_0 = 3.8546e-04
Loss = 3.9523e-01, PNorm = 59.1841, GNorm = 0.8669, lr_0 = 3.8520e-04
Loss = 4.0258e-01, PNorm = 59.1910, GNorm = 2.0308, lr_0 = 3.8493e-04
Loss = 3.8175e-01, PNorm = 59.1942, GNorm = 1.4886, lr_0 = 3.8467e-04
Loss = 4.3106e-01, PNorm = 59.2030, GNorm = 2.4113, lr_0 = 3.8441e-04
Loss = 4.2452e-01, PNorm = 59.2076, GNorm = 1.2629, lr_0 = 3.8414e-04
Loss = 5.0297e-01, PNorm = 59.2106, GNorm = 2.1048, lr_0 = 3.8388e-04
Loss = 4.6692e-01, PNorm = 59.2198, GNorm = 1.7763, lr_0 = 3.8362e-04
Loss = 4.0596e-01, PNorm = 59.2243, GNorm = 1.0726, lr_0 = 3.8336e-04
Loss = 4.0575e-01, PNorm = 59.2314, GNorm = 1.6935, lr_0 = 3.8309e-04
Loss = 3.9977e-01, PNorm = 59.2368, GNorm = 1.2326, lr_0 = 3.8283e-04
Loss = 3.6576e-01, PNorm = 59.2454, GNorm = 1.1425, lr_0 = 3.8257e-04
Loss = 4.2367e-01, PNorm = 59.2485, GNorm = 1.3931, lr_0 = 3.8231e-04
Loss = 3.5797e-01, PNorm = 59.2505, GNorm = 1.3467, lr_0 = 3.8204e-04
Loss = 3.3555e-01, PNorm = 59.2537, GNorm = 1.2842, lr_0 = 3.8178e-04
Loss = 3.8032e-01, PNorm = 59.2590, GNorm = 1.5513, lr_0 = 3.8152e-04
Loss = 4.2111e-01, PNorm = 59.2594, GNorm = 2.4140, lr_0 = 3.8126e-04
Loss = 4.0637e-01, PNorm = 59.2636, GNorm = 1.8546, lr_0 = 3.8100e-04
Loss = 3.8817e-01, PNorm = 59.2655, GNorm = 1.5019, lr_0 = 3.8074e-04
Loss = 3.7649e-01, PNorm = 59.2713, GNorm = 1.1540, lr_0 = 3.8048e-04
Loss = 4.1198e-01, PNorm = 59.2762, GNorm = 1.3253, lr_0 = 3.8022e-04
Loss = 3.5145e-01, PNorm = 59.2821, GNorm = 1.1213, lr_0 = 3.7995e-04
Loss = 3.5559e-01, PNorm = 59.2871, GNorm = 1.7630, lr_0 = 3.7969e-04
Loss = 4.3025e-01, PNorm = 59.2914, GNorm = 1.9575, lr_0 = 3.7943e-04
Loss = 3.8604e-01, PNorm = 59.2988, GNorm = 2.1915, lr_0 = 3.7917e-04
Loss = 4.2502e-01, PNorm = 59.3027, GNorm = 1.3196, lr_0 = 3.7891e-04
Loss = 4.2770e-01, PNorm = 59.3087, GNorm = 1.3889, lr_0 = 3.7866e-04
Loss = 3.9895e-01, PNorm = 59.3103, GNorm = 1.4437, lr_0 = 3.7840e-04
Loss = 4.5922e-01, PNorm = 59.3198, GNorm = 1.3843, lr_0 = 3.7814e-04
Loss = 3.9280e-01, PNorm = 59.3244, GNorm = 1.5682, lr_0 = 3.7788e-04
Loss = 4.4418e-01, PNorm = 59.3296, GNorm = 1.1520, lr_0 = 3.7762e-04
Loss = 3.8344e-01, PNorm = 59.3382, GNorm = 1.5052, lr_0 = 3.7736e-04
Loss = 3.8595e-01, PNorm = 59.3462, GNorm = 1.4307, lr_0 = 3.7710e-04
Loss = 4.0662e-01, PNorm = 59.3444, GNorm = 1.2277, lr_0 = 3.7684e-04
Loss = 3.9670e-01, PNorm = 59.3479, GNorm = 1.3743, lr_0 = 3.7659e-04
Loss = 3.9376e-01, PNorm = 59.3552, GNorm = 1.7231, lr_0 = 3.7633e-04
Loss = 3.7272e-01, PNorm = 59.3623, GNorm = 1.2131, lr_0 = 3.7607e-04
Loss = 4.1298e-01, PNorm = 59.3657, GNorm = 1.2356, lr_0 = 3.7581e-04
Loss = 3.6796e-01, PNorm = 59.3735, GNorm = 0.9166, lr_0 = 3.7555e-04
Loss = 4.0888e-01, PNorm = 59.3780, GNorm = 0.8583, lr_0 = 3.7530e-04
Loss = 3.8797e-01, PNorm = 59.3861, GNorm = 0.9318, lr_0 = 3.7504e-04
Loss = 4.0984e-01, PNorm = 59.3926, GNorm = 2.0214, lr_0 = 3.7478e-04
Loss = 4.2041e-01, PNorm = 59.3999, GNorm = 1.3242, lr_0 = 3.7453e-04
Loss = 3.8012e-01, PNorm = 59.4081, GNorm = 1.0939, lr_0 = 3.7427e-04
Loss = 4.4118e-01, PNorm = 59.4186, GNorm = 1.3164, lr_0 = 3.7401e-04
Loss = 4.1156e-01, PNorm = 59.4291, GNorm = 1.5798, lr_0 = 3.7376e-04
Loss = 3.6606e-01, PNorm = 59.4305, GNorm = 1.4700, lr_0 = 3.7350e-04
Loss = 5.1562e-01, PNorm = 59.4277, GNorm = 1.5403, lr_0 = 3.7325e-04
Loss = 3.3155e-01, PNorm = 59.4354, GNorm = 1.5358, lr_0 = 3.7299e-04
Loss = 3.4570e-01, PNorm = 59.4405, GNorm = 1.3178, lr_0 = 3.7273e-04
Validation mae = 0.113679
Epoch 14
Loss = 3.4112e-01, PNorm = 59.4463, GNorm = 1.3017, lr_0 = 3.7248e-04
Loss = 3.8081e-01, PNorm = 59.4510, GNorm = 1.5599, lr_0 = 3.7222e-04
Loss = 4.2616e-01, PNorm = 59.4551, GNorm = 1.4483, lr_0 = 3.7197e-04
Loss = 3.8739e-01, PNorm = 59.4581, GNorm = 1.4943, lr_0 = 3.7171e-04
Loss = 3.7910e-01, PNorm = 59.4643, GNorm = 1.3580, lr_0 = 3.7146e-04
Loss = 4.3106e-01, PNorm = 59.4715, GNorm = 1.9321, lr_0 = 3.7120e-04
Loss = 3.4785e-01, PNorm = 59.4756, GNorm = 1.9386, lr_0 = 3.7095e-04
Loss = 3.9218e-01, PNorm = 59.4768, GNorm = 1.1578, lr_0 = 3.7070e-04
Loss = 3.4695e-01, PNorm = 59.4794, GNorm = 0.9158, lr_0 = 3.7044e-04
Loss = 3.5679e-01, PNorm = 59.4846, GNorm = 1.3060, lr_0 = 3.7019e-04
Loss = 3.9681e-01, PNorm = 59.4915, GNorm = 1.5369, lr_0 = 3.6993e-04
Loss = 3.7658e-01, PNorm = 59.5035, GNorm = 1.2283, lr_0 = 3.6968e-04
Loss = 4.0506e-01, PNorm = 59.5088, GNorm = 1.0113, lr_0 = 3.6943e-04
Loss = 3.7771e-01, PNorm = 59.5146, GNorm = 1.3806, lr_0 = 3.6917e-04
Loss = 3.7313e-01, PNorm = 59.5184, GNorm = 1.0778, lr_0 = 3.6892e-04
Loss = 3.7406e-01, PNorm = 59.5228, GNorm = 1.3488, lr_0 = 3.6867e-04
Loss = 4.2462e-01, PNorm = 59.5277, GNorm = 1.4407, lr_0 = 3.6842e-04
Loss = 3.1288e-01, PNorm = 59.5319, GNorm = 1.3097, lr_0 = 3.6816e-04
Loss = 3.9822e-01, PNorm = 59.5313, GNorm = 1.3943, lr_0 = 3.6791e-04
Loss = 3.9104e-01, PNorm = 59.5391, GNorm = 1.3723, lr_0 = 3.6766e-04
Loss = 3.4825e-01, PNorm = 59.5428, GNorm = 1.6780, lr_0 = 3.6741e-04
Loss = 3.7867e-01, PNorm = 59.5506, GNorm = 0.9918, lr_0 = 3.6716e-04
Loss = 4.2638e-01, PNorm = 59.5570, GNorm = 2.0479, lr_0 = 3.6690e-04
Loss = 4.1831e-01, PNorm = 59.5625, GNorm = 1.4821, lr_0 = 3.6665e-04
Loss = 3.8989e-01, PNorm = 59.5668, GNorm = 1.8110, lr_0 = 3.6640e-04
Loss = 3.7896e-01, PNorm = 59.5746, GNorm = 1.3564, lr_0 = 3.6615e-04
Loss = 3.9934e-01, PNorm = 59.5795, GNorm = 1.2995, lr_0 = 3.6590e-04
Loss = 3.8744e-01, PNorm = 59.5835, GNorm = 1.1798, lr_0 = 3.6565e-04
Loss = 4.7004e-01, PNorm = 59.5868, GNorm = 1.0997, lr_0 = 3.6540e-04
Loss = 3.7310e-01, PNorm = 59.5969, GNorm = 2.0440, lr_0 = 3.6515e-04
Loss = 3.9968e-01, PNorm = 59.6036, GNorm = 1.7442, lr_0 = 3.6490e-04
Loss = 3.7763e-01, PNorm = 59.6106, GNorm = 1.8022, lr_0 = 3.6465e-04
Loss = 4.0791e-01, PNorm = 59.6143, GNorm = 1.0943, lr_0 = 3.6440e-04
Loss = 4.0574e-01, PNorm = 59.6193, GNorm = 1.3119, lr_0 = 3.6415e-04
Loss = 3.8381e-01, PNorm = 59.6302, GNorm = 1.0990, lr_0 = 3.6390e-04
Loss = 4.0574e-01, PNorm = 59.6342, GNorm = 1.8540, lr_0 = 3.6365e-04
Loss = 3.8040e-01, PNorm = 59.6373, GNorm = 1.3254, lr_0 = 3.6340e-04
Loss = 3.8117e-01, PNorm = 59.6393, GNorm = 1.4779, lr_0 = 3.6315e-04
Loss = 3.4278e-01, PNorm = 59.6471, GNorm = 1.4312, lr_0 = 3.6290e-04
Loss = 4.3900e-01, PNorm = 59.6476, GNorm = 1.1800, lr_0 = 3.6266e-04
Loss = 4.1383e-01, PNorm = 59.6479, GNorm = 1.2396, lr_0 = 3.6241e-04
Loss = 3.6807e-01, PNorm = 59.6511, GNorm = 1.3845, lr_0 = 3.6216e-04
Loss = 3.9083e-01, PNorm = 59.6560, GNorm = 1.4028, lr_0 = 3.6191e-04
Loss = 3.7260e-01, PNorm = 59.6603, GNorm = 1.2561, lr_0 = 3.6166e-04
Loss = 4.4245e-01, PNorm = 59.6656, GNorm = 1.2535, lr_0 = 3.6141e-04
Loss = 3.7639e-01, PNorm = 59.6707, GNorm = 1.9959, lr_0 = 3.6117e-04
Loss = 3.6638e-01, PNorm = 59.6724, GNorm = 1.4457, lr_0 = 3.6092e-04
Loss = 4.0806e-01, PNorm = 59.6775, GNorm = 1.4250, lr_0 = 3.6067e-04
Loss = 4.1537e-01, PNorm = 59.6803, GNorm = 1.0646, lr_0 = 3.6043e-04
Loss = 4.2726e-01, PNorm = 59.6917, GNorm = 1.2836, lr_0 = 3.6018e-04
Loss = 3.8159e-01, PNorm = 59.6986, GNorm = 1.5478, lr_0 = 3.5993e-04
Loss = 3.8025e-01, PNorm = 59.7061, GNorm = 1.0903, lr_0 = 3.5969e-04
Loss = 3.9030e-01, PNorm = 59.7092, GNorm = 1.1690, lr_0 = 3.5944e-04
Loss = 4.0922e-01, PNorm = 59.7158, GNorm = 0.9928, lr_0 = 3.5919e-04
Loss = 3.5382e-01, PNorm = 59.7241, GNorm = 1.6358, lr_0 = 3.5895e-04
Loss = 4.1541e-01, PNorm = 59.7287, GNorm = 1.8702, lr_0 = 3.5870e-04
Loss = 4.9871e-01, PNorm = 59.7357, GNorm = 1.1621, lr_0 = 3.5845e-04
Loss = 3.9956e-01, PNorm = 59.7377, GNorm = 1.4913, lr_0 = 3.5821e-04
Loss = 3.4782e-01, PNorm = 59.7441, GNorm = 1.4589, lr_0 = 3.5796e-04
Loss = 3.8481e-01, PNorm = 59.7522, GNorm = 1.6164, lr_0 = 3.5772e-04
Loss = 4.0834e-01, PNorm = 59.7542, GNorm = 2.1903, lr_0 = 3.5747e-04
Loss = 4.4389e-01, PNorm = 59.7619, GNorm = 1.1510, lr_0 = 3.5723e-04
Loss = 3.5453e-01, PNorm = 59.7662, GNorm = 1.3421, lr_0 = 3.5698e-04
Loss = 3.8317e-01, PNorm = 59.7697, GNorm = 1.5174, lr_0 = 3.5674e-04
Loss = 3.6427e-01, PNorm = 59.7796, GNorm = 1.3150, lr_0 = 3.5650e-04
Loss = 4.8926e-01, PNorm = 59.7826, GNorm = 1.1118, lr_0 = 3.5625e-04
Loss = 3.9327e-01, PNorm = 59.7885, GNorm = 1.4551, lr_0 = 3.5601e-04
Loss = 4.1001e-01, PNorm = 59.7916, GNorm = 1.9471, lr_0 = 3.5576e-04
Loss = 4.6235e-01, PNorm = 59.7968, GNorm = 1.4732, lr_0 = 3.5552e-04
Loss = 3.8351e-01, PNorm = 59.8082, GNorm = 1.0346, lr_0 = 3.5528e-04
Loss = 4.2127e-01, PNorm = 59.8154, GNorm = 2.3278, lr_0 = 3.5503e-04
Loss = 4.0693e-01, PNorm = 59.8197, GNorm = 1.6254, lr_0 = 3.5479e-04
Loss = 3.9045e-01, PNorm = 59.8292, GNorm = 1.2991, lr_0 = 3.5455e-04
Loss = 4.2326e-01, PNorm = 59.8307, GNorm = 1.4612, lr_0 = 3.5430e-04
Loss = 4.3038e-01, PNorm = 59.8356, GNorm = 1.7032, lr_0 = 3.5406e-04
Loss = 3.4801e-01, PNorm = 59.8438, GNorm = 1.1012, lr_0 = 3.5382e-04
Loss = 3.8336e-01, PNorm = 59.8503, GNorm = 1.3358, lr_0 = 3.5358e-04
Loss = 3.8483e-01, PNorm = 59.8544, GNorm = 1.2155, lr_0 = 3.5333e-04
Loss = 3.4409e-01, PNorm = 59.8582, GNorm = 1.0548, lr_0 = 3.5309e-04
Loss = 3.5362e-01, PNorm = 59.8642, GNorm = 1.5309, lr_0 = 3.5285e-04
Loss = 3.9622e-01, PNorm = 59.8681, GNorm = 1.4196, lr_0 = 3.5261e-04
Loss = 3.7326e-01, PNorm = 59.8742, GNorm = 1.6107, lr_0 = 3.5237e-04
Loss = 4.2701e-01, PNorm = 59.8791, GNorm = 1.5503, lr_0 = 3.5212e-04
Loss = 3.9865e-01, PNorm = 59.8838, GNorm = 1.5172, lr_0 = 3.5188e-04
Loss = 3.5760e-01, PNorm = 59.8832, GNorm = 1.4207, lr_0 = 3.5164e-04
Loss = 3.6661e-01, PNorm = 59.8873, GNorm = 1.0233, lr_0 = 3.5140e-04
Loss = 3.9502e-01, PNorm = 59.8897, GNorm = 1.3164, lr_0 = 3.5116e-04
Loss = 3.8509e-01, PNorm = 59.8939, GNorm = 1.2991, lr_0 = 3.5092e-04
Loss = 3.6004e-01, PNorm = 59.8977, GNorm = 0.8829, lr_0 = 3.5068e-04
Loss = 4.2586e-01, PNorm = 59.9040, GNorm = 2.9766, lr_0 = 3.5044e-04
Loss = 3.6534e-01, PNorm = 59.9073, GNorm = 1.3439, lr_0 = 3.5020e-04
Loss = 3.9112e-01, PNorm = 59.9099, GNorm = 1.2575, lr_0 = 3.4996e-04
Loss = 4.0536e-01, PNorm = 59.9141, GNorm = 1.4734, lr_0 = 3.4972e-04
Loss = 3.8852e-01, PNorm = 59.9187, GNorm = 1.7782, lr_0 = 3.4948e-04
Loss = 3.8478e-01, PNorm = 59.9243, GNorm = 1.4647, lr_0 = 3.4924e-04
Loss = 3.9391e-01, PNorm = 59.9302, GNorm = 1.5074, lr_0 = 3.4900e-04
Loss = 4.0705e-01, PNorm = 59.9356, GNorm = 1.6405, lr_0 = 3.4876e-04
Loss = 4.2345e-01, PNorm = 59.9422, GNorm = 1.6325, lr_0 = 3.4852e-04
Loss = 3.7813e-01, PNorm = 59.9485, GNorm = 1.6203, lr_0 = 3.4828e-04
Loss = 3.6423e-01, PNorm = 59.9524, GNorm = 1.0424, lr_0 = 3.4805e-04
Loss = 3.8124e-01, PNorm = 59.9597, GNorm = 1.6605, lr_0 = 3.4781e-04
Loss = 4.1292e-01, PNorm = 59.9661, GNorm = 2.7467, lr_0 = 3.4757e-04
Loss = 4.0139e-01, PNorm = 59.9703, GNorm = 2.7953, lr_0 = 3.4733e-04
Loss = 3.9030e-01, PNorm = 59.9731, GNorm = 1.0917, lr_0 = 3.4709e-04
Loss = 4.5617e-01, PNorm = 59.9755, GNorm = 1.5084, lr_0 = 3.4686e-04
Loss = 3.7321e-01, PNorm = 59.9856, GNorm = 1.1824, lr_0 = 3.4662e-04
Loss = 3.3167e-01, PNorm = 59.9903, GNorm = 1.1436, lr_0 = 3.4638e-04
Loss = 3.6685e-01, PNorm = 59.9961, GNorm = 1.1194, lr_0 = 3.4614e-04
Loss = 3.4203e-01, PNorm = 60.0019, GNorm = 1.1142, lr_0 = 3.4591e-04
Loss = 3.8960e-01, PNorm = 60.0082, GNorm = 1.6720, lr_0 = 3.4567e-04
Loss = 4.0935e-01, PNorm = 60.0133, GNorm = 1.2252, lr_0 = 3.4543e-04
Loss = 4.4071e-01, PNorm = 60.0154, GNorm = 2.0479, lr_0 = 3.4520e-04
Loss = 3.3134e-01, PNorm = 60.0200, GNorm = 1.1415, lr_0 = 3.4496e-04
Loss = 4.0174e-01, PNorm = 60.0208, GNorm = 1.4020, lr_0 = 3.4472e-04
Loss = 3.6568e-01, PNorm = 60.0280, GNorm = 1.3563, lr_0 = 3.4449e-04
Loss = 4.0080e-01, PNorm = 60.0346, GNorm = 1.5465, lr_0 = 3.4425e-04
Loss = 3.8025e-01, PNorm = 60.0439, GNorm = 0.8788, lr_0 = 3.4402e-04
Loss = 3.9213e-01, PNorm = 60.0500, GNorm = 1.4132, lr_0 = 3.4378e-04
Loss = 4.4200e-01, PNorm = 60.0525, GNorm = 1.2688, lr_0 = 3.4354e-04
Loss = 3.9307e-01, PNorm = 60.0609, GNorm = 1.2610, lr_0 = 3.4331e-04
Validation mae = 0.114377
Epoch 15
Loss = 3.8791e-01, PNorm = 60.0622, GNorm = 1.3734, lr_0 = 3.4307e-04
Loss = 4.5887e-01, PNorm = 60.0646, GNorm = 1.8518, lr_0 = 3.4284e-04
Loss = 3.5871e-01, PNorm = 60.0702, GNorm = 1.0607, lr_0 = 3.4260e-04
Loss = 3.9299e-01, PNorm = 60.0769, GNorm = 1.6151, lr_0 = 3.4237e-04
Loss = 3.9775e-01, PNorm = 60.0838, GNorm = 1.4411, lr_0 = 3.4213e-04
Loss = 4.3061e-01, PNorm = 60.0866, GNorm = 1.5342, lr_0 = 3.4190e-04
Loss = 3.9300e-01, PNorm = 60.0923, GNorm = 1.2037, lr_0 = 3.4167e-04
Loss = 3.3187e-01, PNorm = 60.0933, GNorm = 1.5437, lr_0 = 3.4143e-04
Loss = 4.1086e-01, PNorm = 60.0967, GNorm = 1.1663, lr_0 = 3.4120e-04
Loss = 3.7938e-01, PNorm = 60.1017, GNorm = 0.9556, lr_0 = 3.4096e-04
Loss = 4.3543e-01, PNorm = 60.1112, GNorm = 1.8398, lr_0 = 3.4073e-04
Loss = 3.9340e-01, PNorm = 60.1185, GNorm = 1.8513, lr_0 = 3.4050e-04
Loss = 3.9287e-01, PNorm = 60.1254, GNorm = 1.3233, lr_0 = 3.4026e-04
Loss = 3.8622e-01, PNorm = 60.1324, GNorm = 1.5708, lr_0 = 3.4003e-04
Loss = 3.9099e-01, PNorm = 60.1379, GNorm = 1.2197, lr_0 = 3.3980e-04
Loss = 3.4788e-01, PNorm = 60.1381, GNorm = 1.1149, lr_0 = 3.3956e-04
Loss = 3.6140e-01, PNorm = 60.1448, GNorm = 1.0912, lr_0 = 3.3933e-04
Loss = 3.7500e-01, PNorm = 60.1507, GNorm = 1.5255, lr_0 = 3.3910e-04
Loss = 3.7361e-01, PNorm = 60.1550, GNorm = 1.1407, lr_0 = 3.3887e-04
Loss = 3.9164e-01, PNorm = 60.1594, GNorm = 1.1075, lr_0 = 3.3864e-04
Loss = 3.2818e-01, PNorm = 60.1650, GNorm = 1.6694, lr_0 = 3.3840e-04
Loss = 3.2406e-01, PNorm = 60.1715, GNorm = 1.3738, lr_0 = 3.3817e-04
Loss = 4.0940e-01, PNorm = 60.1772, GNorm = 1.4776, lr_0 = 3.3794e-04
Loss = 3.9449e-01, PNorm = 60.1812, GNorm = 1.2146, lr_0 = 3.3771e-04
Loss = 3.6747e-01, PNorm = 60.1860, GNorm = 1.5044, lr_0 = 3.3748e-04
Loss = 3.9681e-01, PNorm = 60.1909, GNorm = 1.7185, lr_0 = 3.3725e-04
Loss = 3.3234e-01, PNorm = 60.1928, GNorm = 0.7572, lr_0 = 3.3701e-04
Loss = 3.6594e-01, PNorm = 60.1982, GNorm = 1.0273, lr_0 = 3.3678e-04
Loss = 3.6386e-01, PNorm = 60.2010, GNorm = 1.2627, lr_0 = 3.3655e-04
Loss = 4.2369e-01, PNorm = 60.2042, GNorm = 2.0662, lr_0 = 3.3632e-04
Loss = 3.5976e-01, PNorm = 60.2099, GNorm = 0.8021, lr_0 = 3.3609e-04
Loss = 3.9267e-01, PNorm = 60.2120, GNorm = 1.3308, lr_0 = 3.3586e-04
Loss = 4.2289e-01, PNorm = 60.2205, GNorm = 1.9887, lr_0 = 3.3563e-04
Loss = 3.9188e-01, PNorm = 60.2250, GNorm = 1.6993, lr_0 = 3.3540e-04
Loss = 3.9631e-01, PNorm = 60.2274, GNorm = 2.8464, lr_0 = 3.3517e-04
Loss = 3.8295e-01, PNorm = 60.2255, GNorm = 1.2674, lr_0 = 3.3494e-04
Loss = 4.0372e-01, PNorm = 60.2321, GNorm = 0.8053, lr_0 = 3.3471e-04
Loss = 4.2723e-01, PNorm = 60.2397, GNorm = 1.4957, lr_0 = 3.3448e-04
Loss = 2.8163e-01, PNorm = 60.2540, GNorm = 1.3052, lr_0 = 3.3425e-04
Loss = 4.6193e-01, PNorm = 60.2602, GNorm = 1.7150, lr_0 = 3.3403e-04
Loss = 4.1723e-01, PNorm = 60.2670, GNorm = 2.9479, lr_0 = 3.3380e-04
Loss = 3.8887e-01, PNorm = 60.2789, GNorm = 1.9678, lr_0 = 3.3357e-04
Loss = 3.9035e-01, PNorm = 60.2808, GNorm = 1.6770, lr_0 = 3.3334e-04
Loss = 4.2183e-01, PNorm = 60.2866, GNorm = 2.4205, lr_0 = 3.3311e-04
Loss = 3.5184e-01, PNorm = 60.2893, GNorm = 1.5434, lr_0 = 3.3288e-04
Loss = 3.6670e-01, PNorm = 60.2965, GNorm = 1.5202, lr_0 = 3.3265e-04
Loss = 4.2372e-01, PNorm = 60.2978, GNorm = 1.5033, lr_0 = 3.3243e-04
Loss = 3.9707e-01, PNorm = 60.2989, GNorm = 1.2891, lr_0 = 3.3220e-04
Loss = 4.2624e-01, PNorm = 60.3033, GNorm = 1.3631, lr_0 = 3.3197e-04
Loss = 3.3298e-01, PNorm = 60.3086, GNorm = 1.5142, lr_0 = 3.3174e-04
Loss = 3.7393e-01, PNorm = 60.3134, GNorm = 1.2552, lr_0 = 3.3152e-04
Loss = 3.9903e-01, PNorm = 60.3168, GNorm = 1.7452, lr_0 = 3.3129e-04
Loss = 3.8849e-01, PNorm = 60.3214, GNorm = 1.7550, lr_0 = 3.3106e-04
Loss = 3.6805e-01, PNorm = 60.3266, GNorm = 1.5773, lr_0 = 3.3084e-04
Loss = 3.4788e-01, PNorm = 60.3317, GNorm = 1.6418, lr_0 = 3.3061e-04
Loss = 3.7359e-01, PNorm = 60.3336, GNorm = 1.4019, lr_0 = 3.3038e-04
Loss = 3.4534e-01, PNorm = 60.3336, GNorm = 1.5082, lr_0 = 3.3016e-04
Loss = 3.7611e-01, PNorm = 60.3396, GNorm = 1.1298, lr_0 = 3.2993e-04
Loss = 3.6397e-01, PNorm = 60.3439, GNorm = 1.3249, lr_0 = 3.2970e-04
Loss = 3.6327e-01, PNorm = 60.3507, GNorm = 1.7180, lr_0 = 3.2948e-04
Loss = 3.6905e-01, PNorm = 60.3516, GNorm = 1.2130, lr_0 = 3.2925e-04
Loss = 4.1696e-01, PNorm = 60.3550, GNorm = 0.9883, lr_0 = 3.2903e-04
Loss = 3.7168e-01, PNorm = 60.3575, GNorm = 1.1080, lr_0 = 3.2880e-04
Loss = 4.0885e-01, PNorm = 60.3605, GNorm = 1.1028, lr_0 = 3.2858e-04
Loss = 3.7583e-01, PNorm = 60.3625, GNorm = 1.2681, lr_0 = 3.2835e-04
Loss = 3.7301e-01, PNorm = 60.3713, GNorm = 1.4770, lr_0 = 3.2813e-04
Loss = 3.6613e-01, PNorm = 60.3754, GNorm = 1.2697, lr_0 = 3.2790e-04
Loss = 3.7853e-01, PNorm = 60.3810, GNorm = 1.5830, lr_0 = 3.2768e-04
Loss = 3.3551e-01, PNorm = 60.3851, GNorm = 1.1831, lr_0 = 3.2745e-04
Loss = 3.8034e-01, PNorm = 60.3886, GNorm = 1.2128, lr_0 = 3.2723e-04
Loss = 3.8587e-01, PNorm = 60.3922, GNorm = 1.7579, lr_0 = 3.2700e-04
Loss = 3.3180e-01, PNorm = 60.3978, GNorm = 1.3936, lr_0 = 3.2678e-04
Loss = 3.3329e-01, PNorm = 60.4014, GNorm = 1.7930, lr_0 = 3.2656e-04
Loss = 3.5813e-01, PNorm = 60.4022, GNorm = 1.8345, lr_0 = 3.2633e-04
Loss = 3.5372e-01, PNorm = 60.4060, GNorm = 1.7306, lr_0 = 3.2611e-04
Loss = 3.5713e-01, PNorm = 60.4122, GNorm = 1.2657, lr_0 = 3.2589e-04
Loss = 3.9878e-01, PNorm = 60.4161, GNorm = 1.4435, lr_0 = 3.2566e-04
Loss = 3.3636e-01, PNorm = 60.4255, GNorm = 2.0442, lr_0 = 3.2544e-04
Loss = 3.6200e-01, PNorm = 60.4302, GNorm = 1.3965, lr_0 = 3.2522e-04
Loss = 3.2048e-01, PNorm = 60.4341, GNorm = 1.1575, lr_0 = 3.2499e-04
Loss = 3.9887e-01, PNorm = 60.4337, GNorm = 1.5165, lr_0 = 3.2477e-04
Loss = 3.9322e-01, PNorm = 60.4405, GNorm = 1.3720, lr_0 = 3.2455e-04
Loss = 3.8935e-01, PNorm = 60.4460, GNorm = 1.7258, lr_0 = 3.2433e-04
Loss = 3.3778e-01, PNorm = 60.4495, GNorm = 1.7282, lr_0 = 3.2410e-04
Loss = 3.8590e-01, PNorm = 60.4551, GNorm = 1.6496, lr_0 = 3.2388e-04
Loss = 4.2094e-01, PNorm = 60.4605, GNorm = 1.3570, lr_0 = 3.2366e-04
Loss = 4.2415e-01, PNorm = 60.4585, GNorm = 1.2338, lr_0 = 3.2344e-04
Loss = 3.7969e-01, PNorm = 60.4608, GNorm = 2.4383, lr_0 = 3.2322e-04
Loss = 3.6871e-01, PNorm = 60.4636, GNorm = 1.1087, lr_0 = 3.2300e-04
Loss = 3.9586e-01, PNorm = 60.4705, GNorm = 1.4363, lr_0 = 3.2277e-04
Loss = 4.2311e-01, PNorm = 60.4707, GNorm = 1.5057, lr_0 = 3.2255e-04
Loss = 4.0439e-01, PNorm = 60.4743, GNorm = 1.6847, lr_0 = 3.2233e-04
Loss = 3.3397e-01, PNorm = 60.4748, GNorm = 1.3425, lr_0 = 3.2211e-04
Loss = 4.1065e-01, PNorm = 60.4783, GNorm = 1.6311, lr_0 = 3.2189e-04
Loss = 3.6599e-01, PNorm = 60.4846, GNorm = 1.5477, lr_0 = 3.2167e-04
Loss = 3.7809e-01, PNorm = 60.4826, GNorm = 1.4764, lr_0 = 3.2145e-04
Loss = 3.8485e-01, PNorm = 60.4872, GNorm = 2.1685, lr_0 = 3.2123e-04
Loss = 3.7823e-01, PNorm = 60.4926, GNorm = 1.2296, lr_0 = 3.2101e-04
Loss = 3.8026e-01, PNorm = 60.4989, GNorm = 1.0861, lr_0 = 3.2079e-04
Loss = 4.2365e-01, PNorm = 60.5028, GNorm = 1.1852, lr_0 = 3.2057e-04
Loss = 3.6861e-01, PNorm = 60.5075, GNorm = 1.0522, lr_0 = 3.2035e-04
Loss = 3.7389e-01, PNorm = 60.5123, GNorm = 1.3212, lr_0 = 3.2013e-04
Loss = 4.5285e-01, PNorm = 60.5168, GNorm = 1.5003, lr_0 = 3.1991e-04
Loss = 4.2353e-01, PNorm = 60.5220, GNorm = 1.5181, lr_0 = 3.1969e-04
Loss = 4.4539e-01, PNorm = 60.5275, GNorm = 1.1910, lr_0 = 3.1947e-04
Loss = 3.6564e-01, PNorm = 60.5301, GNorm = 1.7041, lr_0 = 3.1925e-04
Loss = 4.5439e-01, PNorm = 60.5335, GNorm = 1.7849, lr_0 = 3.1904e-04
Loss = 3.5963e-01, PNorm = 60.5411, GNorm = 1.3877, lr_0 = 3.1882e-04
Loss = 4.2105e-01, PNorm = 60.5408, GNorm = 1.7146, lr_0 = 3.1860e-04
Loss = 3.9367e-01, PNorm = 60.5479, GNorm = 1.7582, lr_0 = 3.1838e-04
Loss = 3.8712e-01, PNorm = 60.5557, GNorm = 1.3494, lr_0 = 3.1816e-04
Loss = 3.7685e-01, PNorm = 60.5605, GNorm = 1.2102, lr_0 = 3.1794e-04
Loss = 3.3252e-01, PNorm = 60.5635, GNorm = 1.1267, lr_0 = 3.1773e-04
Loss = 3.3858e-01, PNorm = 60.5676, GNorm = 1.3049, lr_0 = 3.1751e-04
Loss = 3.6482e-01, PNorm = 60.5701, GNorm = 1.3680, lr_0 = 3.1729e-04
Loss = 3.4125e-01, PNorm = 60.5713, GNorm = 1.1509, lr_0 = 3.1707e-04
Loss = 4.1812e-01, PNorm = 60.5762, GNorm = 1.3465, lr_0 = 3.1686e-04
Loss = 4.1510e-01, PNorm = 60.5776, GNorm = 1.1713, lr_0 = 3.1664e-04
Loss = 4.1525e-01, PNorm = 60.5865, GNorm = 1.1487, lr_0 = 3.1642e-04
Loss = 3.5996e-01, PNorm = 60.5935, GNorm = 1.2127, lr_0 = 3.1621e-04
Validation mae = 0.113516
Epoch 16
Loss = 3.6440e-01, PNorm = 60.5925, GNorm = 1.7991, lr_0 = 3.1599e-04
Loss = 3.4171e-01, PNorm = 60.5957, GNorm = 1.2225, lr_0 = 3.1577e-04
Loss = 4.0536e-01, PNorm = 60.5978, GNorm = 1.9239, lr_0 = 3.1556e-04
Loss = 3.7766e-01, PNorm = 60.6050, GNorm = 1.2461, lr_0 = 3.1534e-04
Loss = 3.7854e-01, PNorm = 60.6104, GNorm = 1.4513, lr_0 = 3.1512e-04
Loss = 4.0188e-01, PNorm = 60.6137, GNorm = 1.4062, lr_0 = 3.1491e-04
Loss = 4.0899e-01, PNorm = 60.6176, GNorm = 1.3369, lr_0 = 3.1469e-04
Loss = 3.4689e-01, PNorm = 60.6231, GNorm = 0.9601, lr_0 = 3.1448e-04
Loss = 3.7858e-01, PNorm = 60.6301, GNorm = 1.5156, lr_0 = 3.1426e-04
Loss = 3.5428e-01, PNorm = 60.6357, GNorm = 1.4031, lr_0 = 3.1405e-04
Loss = 3.6731e-01, PNorm = 60.6417, GNorm = 1.0320, lr_0 = 3.1383e-04
Loss = 3.9748e-01, PNorm = 60.6416, GNorm = 1.1115, lr_0 = 3.1362e-04
Loss = 4.1129e-01, PNorm = 60.6427, GNorm = 1.5963, lr_0 = 3.1340e-04
Loss = 4.0082e-01, PNorm = 60.6472, GNorm = 1.7498, lr_0 = 3.1319e-04
Loss = 3.1286e-01, PNorm = 60.6514, GNorm = 0.8328, lr_0 = 3.1297e-04
Loss = 4.0387e-01, PNorm = 60.6582, GNorm = 1.2887, lr_0 = 3.1276e-04
Loss = 4.1084e-01, PNorm = 60.6619, GNorm = 1.8427, lr_0 = 3.1254e-04
Loss = 3.6002e-01, PNorm = 60.6656, GNorm = 1.5272, lr_0 = 3.1233e-04
Loss = 3.8961e-01, PNorm = 60.6694, GNorm = 1.9581, lr_0 = 3.1212e-04
Loss = 3.9813e-01, PNorm = 60.6727, GNorm = 1.4857, lr_0 = 3.1190e-04
Loss = 3.9788e-01, PNorm = 60.6732, GNorm = 1.6449, lr_0 = 3.1169e-04
Loss = 3.8414e-01, PNorm = 60.6781, GNorm = 1.3508, lr_0 = 3.1147e-04
Loss = 3.6356e-01, PNorm = 60.6846, GNorm = 0.9807, lr_0 = 3.1126e-04
Loss = 4.3057e-01, PNorm = 60.6918, GNorm = 1.6449, lr_0 = 3.1105e-04
Loss = 3.5236e-01, PNorm = 60.6980, GNorm = 1.1310, lr_0 = 3.1083e-04
Loss = 3.3662e-01, PNorm = 60.7021, GNorm = 1.2803, lr_0 = 3.1062e-04
Loss = 3.5889e-01, PNorm = 60.7017, GNorm = 1.6478, lr_0 = 3.1041e-04
Loss = 4.4504e-01, PNorm = 60.7047, GNorm = 1.7571, lr_0 = 3.1020e-04
Loss = 3.6950e-01, PNorm = 60.7120, GNorm = 1.8480, lr_0 = 3.0998e-04
Loss = 3.4393e-01, PNorm = 60.7189, GNorm = 1.1104, lr_0 = 3.0977e-04
Loss = 4.1205e-01, PNorm = 60.7259, GNorm = 1.4866, lr_0 = 3.0956e-04
Loss = 3.5146e-01, PNorm = 60.7304, GNorm = 2.1319, lr_0 = 3.0935e-04
Loss = 4.1066e-01, PNorm = 60.7328, GNorm = 1.9371, lr_0 = 3.0914e-04
Loss = 4.2267e-01, PNorm = 60.7365, GNorm = 1.3827, lr_0 = 3.0892e-04
Loss = 4.1294e-01, PNorm = 60.7426, GNorm = 2.1536, lr_0 = 3.0871e-04
Loss = 3.4633e-01, PNorm = 60.7472, GNorm = 1.3380, lr_0 = 3.0850e-04
Loss = 3.7240e-01, PNorm = 60.7474, GNorm = 1.6690, lr_0 = 3.0829e-04
Loss = 3.5193e-01, PNorm = 60.7500, GNorm = 2.3062, lr_0 = 3.0808e-04
Loss = 3.5968e-01, PNorm = 60.7537, GNorm = 1.3543, lr_0 = 3.0787e-04
Loss = 3.8357e-01, PNorm = 60.7566, GNorm = 1.3876, lr_0 = 3.0766e-04
Loss = 4.0302e-01, PNorm = 60.7629, GNorm = 1.8297, lr_0 = 3.0745e-04
Loss = 3.3664e-01, PNorm = 60.7645, GNorm = 1.1509, lr_0 = 3.0723e-04
Loss = 3.8043e-01, PNorm = 60.7679, GNorm = 2.7468, lr_0 = 3.0702e-04
Loss = 3.6001e-01, PNorm = 60.7729, GNorm = 1.1107, lr_0 = 3.0681e-04
Loss = 4.5024e-01, PNorm = 60.7736, GNorm = 1.8856, lr_0 = 3.0660e-04
Loss = 3.9114e-01, PNorm = 60.7835, GNorm = 1.3024, lr_0 = 3.0639e-04
Loss = 3.7787e-01, PNorm = 60.7926, GNorm = 1.3007, lr_0 = 3.0618e-04
Loss = 3.3620e-01, PNorm = 60.7946, GNorm = 1.2939, lr_0 = 3.0597e-04
Loss = 3.7325e-01, PNorm = 60.7992, GNorm = 1.7786, lr_0 = 3.0576e-04
Loss = 3.4521e-01, PNorm = 60.8038, GNorm = 1.2649, lr_0 = 3.0555e-04
Loss = 4.1440e-01, PNorm = 60.8072, GNorm = 1.4669, lr_0 = 3.0535e-04
Loss = 3.9534e-01, PNorm = 60.8114, GNorm = 1.3070, lr_0 = 3.0514e-04
Loss = 3.7474e-01, PNorm = 60.8193, GNorm = 1.7211, lr_0 = 3.0493e-04
Loss = 4.1338e-01, PNorm = 60.8231, GNorm = 1.7631, lr_0 = 3.0472e-04
Loss = 4.0690e-01, PNorm = 60.8300, GNorm = 1.0776, lr_0 = 3.0451e-04
Loss = 3.6539e-01, PNorm = 60.8341, GNorm = 1.5460, lr_0 = 3.0430e-04
Loss = 3.6740e-01, PNorm = 60.8382, GNorm = 1.3599, lr_0 = 3.0409e-04
Loss = 3.6210e-01, PNorm = 60.8411, GNorm = 1.3449, lr_0 = 3.0388e-04
Loss = 3.2488e-01, PNorm = 60.8416, GNorm = 2.0126, lr_0 = 3.0368e-04
Loss = 3.9652e-01, PNorm = 60.8471, GNorm = 1.8157, lr_0 = 3.0347e-04
Loss = 3.5889e-01, PNorm = 60.8580, GNorm = 1.3268, lr_0 = 3.0326e-04
Loss = 3.5935e-01, PNorm = 60.8621, GNorm = 1.2915, lr_0 = 3.0305e-04
Loss = 3.9720e-01, PNorm = 60.8679, GNorm = 1.1645, lr_0 = 3.0284e-04
Loss = 3.9529e-01, PNorm = 60.8694, GNorm = 1.0469, lr_0 = 3.0264e-04
Loss = 3.6399e-01, PNorm = 60.8726, GNorm = 1.4170, lr_0 = 3.0243e-04
Loss = 4.7888e-01, PNorm = 60.8773, GNorm = 1.7011, lr_0 = 3.0222e-04
Loss = 3.3464e-01, PNorm = 60.8792, GNorm = 1.3146, lr_0 = 3.0202e-04
Loss = 3.6253e-01, PNorm = 60.8847, GNorm = 1.2426, lr_0 = 3.0181e-04
Loss = 3.0887e-01, PNorm = 60.8901, GNorm = 1.0574, lr_0 = 3.0160e-04
Loss = 3.3562e-01, PNorm = 60.8960, GNorm = 0.9949, lr_0 = 3.0140e-04
Loss = 3.7742e-01, PNorm = 60.8993, GNorm = 1.3460, lr_0 = 3.0119e-04
Loss = 4.0741e-01, PNorm = 60.8979, GNorm = 2.4162, lr_0 = 3.0098e-04
Loss = 3.6618e-01, PNorm = 60.9027, GNorm = 1.4036, lr_0 = 3.0078e-04
Loss = 4.2601e-01, PNorm = 60.9073, GNorm = 1.5974, lr_0 = 3.0057e-04
Loss = 4.0264e-01, PNorm = 60.9124, GNorm = 1.6348, lr_0 = 3.0036e-04
Loss = 3.9679e-01, PNorm = 60.9171, GNorm = 1.5120, lr_0 = 3.0016e-04
Loss = 3.2720e-01, PNorm = 60.9228, GNorm = 1.8936, lr_0 = 2.9995e-04
Loss = 3.5697e-01, PNorm = 60.9259, GNorm = 1.4053, lr_0 = 2.9975e-04
Loss = 3.6387e-01, PNorm = 60.9340, GNorm = 1.3150, lr_0 = 2.9954e-04
Loss = 3.5034e-01, PNorm = 60.9380, GNorm = 1.3150, lr_0 = 2.9934e-04
Loss = 3.7020e-01, PNorm = 60.9359, GNorm = 1.1182, lr_0 = 2.9913e-04
Loss = 4.2854e-01, PNorm = 60.9343, GNorm = 1.2856, lr_0 = 2.9893e-04
Loss = 3.6710e-01, PNorm = 60.9390, GNorm = 1.4922, lr_0 = 2.9872e-04
Loss = 3.8178e-01, PNorm = 60.9459, GNorm = 1.1907, lr_0 = 2.9852e-04
Loss = 3.8106e-01, PNorm = 60.9496, GNorm = 0.9914, lr_0 = 2.9831e-04
Loss = 3.4952e-01, PNorm = 60.9546, GNorm = 1.1130, lr_0 = 2.9811e-04
Loss = 3.8661e-01, PNorm = 60.9584, GNorm = 1.6247, lr_0 = 2.9790e-04
Loss = 3.9558e-01, PNorm = 60.9615, GNorm = 1.8295, lr_0 = 2.9770e-04
Loss = 3.8195e-01, PNorm = 60.9664, GNorm = 1.7966, lr_0 = 2.9750e-04
Loss = 3.7584e-01, PNorm = 60.9693, GNorm = 1.1894, lr_0 = 2.9729e-04
Loss = 3.8437e-01, PNorm = 60.9671, GNorm = 2.1869, lr_0 = 2.9709e-04
Loss = 4.0142e-01, PNorm = 60.9724, GNorm = 1.2503, lr_0 = 2.9689e-04
Loss = 3.8713e-01, PNorm = 60.9795, GNorm = 1.0767, lr_0 = 2.9668e-04
Loss = 3.7287e-01, PNorm = 60.9801, GNorm = 1.2508, lr_0 = 2.9648e-04
Loss = 4.1126e-01, PNorm = 60.9814, GNorm = 1.6409, lr_0 = 2.9628e-04
Loss = 3.5408e-01, PNorm = 60.9832, GNorm = 1.3942, lr_0 = 2.9607e-04
Loss = 4.1790e-01, PNorm = 60.9833, GNorm = 1.8620, lr_0 = 2.9587e-04
Loss = 3.9533e-01, PNorm = 60.9904, GNorm = 1.8139, lr_0 = 2.9567e-04
Loss = 3.6560e-01, PNorm = 60.9956, GNorm = 3.6446, lr_0 = 2.9546e-04
Loss = 4.1321e-01, PNorm = 60.9949, GNorm = 1.4454, lr_0 = 2.9526e-04
Loss = 4.1050e-01, PNorm = 60.9949, GNorm = 1.6542, lr_0 = 2.9506e-04
Loss = 3.5008e-01, PNorm = 60.9953, GNorm = 1.4275, lr_0 = 2.9486e-04
Loss = 3.9268e-01, PNorm = 60.9952, GNorm = 1.5521, lr_0 = 2.9466e-04
Loss = 3.6501e-01, PNorm = 61.0039, GNorm = 1.1251, lr_0 = 2.9445e-04
Loss = 3.7400e-01, PNorm = 61.0113, GNorm = 1.4065, lr_0 = 2.9425e-04
Loss = 4.2380e-01, PNorm = 61.0116, GNorm = 1.9747, lr_0 = 2.9405e-04
Loss = 3.7783e-01, PNorm = 61.0156, GNorm = 1.6258, lr_0 = 2.9385e-04
Loss = 3.9461e-01, PNorm = 61.0200, GNorm = 0.9124, lr_0 = 2.9365e-04
Loss = 4.0696e-01, PNorm = 61.0249, GNorm = 1.3521, lr_0 = 2.9345e-04
Loss = 3.9339e-01, PNorm = 61.0285, GNorm = 1.4205, lr_0 = 2.9325e-04
Loss = 4.0015e-01, PNorm = 61.0262, GNorm = 1.4154, lr_0 = 2.9305e-04
Loss = 3.6801e-01, PNorm = 61.0308, GNorm = 1.1765, lr_0 = 2.9284e-04
Loss = 3.1695e-01, PNorm = 61.0335, GNorm = 1.1715, lr_0 = 2.9264e-04
Loss = 3.8132e-01, PNorm = 61.0352, GNorm = 1.9551, lr_0 = 2.9244e-04
Loss = 3.9993e-01, PNorm = 61.0365, GNorm = 1.7362, lr_0 = 2.9224e-04
Loss = 3.5156e-01, PNorm = 61.0382, GNorm = 1.5063, lr_0 = 2.9204e-04
Loss = 3.6132e-01, PNorm = 61.0413, GNorm = 1.4225, lr_0 = 2.9184e-04
Loss = 3.3562e-01, PNorm = 61.0436, GNorm = 0.9096, lr_0 = 2.9164e-04
Loss = 3.7781e-01, PNorm = 61.0454, GNorm = 1.1124, lr_0 = 2.9144e-04
Loss = 3.9050e-01, PNorm = 61.0524, GNorm = 1.5505, lr_0 = 2.9124e-04
Validation mae = 0.112659
Epoch 17
Loss = 3.9008e-01, PNorm = 61.0547, GNorm = 1.1936, lr_0 = 2.9104e-04
Loss = 3.2784e-01, PNorm = 61.0551, GNorm = 0.9147, lr_0 = 2.9084e-04
Loss = 3.5101e-01, PNorm = 61.0557, GNorm = 1.2279, lr_0 = 2.9065e-04
Loss = 3.8640e-01, PNorm = 61.0616, GNorm = 2.0040, lr_0 = 2.9045e-04
Loss = 3.6935e-01, PNorm = 61.0689, GNorm = 1.2490, lr_0 = 2.9025e-04
Loss = 3.6304e-01, PNorm = 61.0725, GNorm = 1.3810, lr_0 = 2.9005e-04
Loss = 3.5308e-01, PNorm = 61.0768, GNorm = 1.3646, lr_0 = 2.8985e-04
Loss = 3.6045e-01, PNorm = 61.0814, GNorm = 0.7777, lr_0 = 2.8965e-04
Loss = 4.1005e-01, PNorm = 61.0858, GNorm = 1.2855, lr_0 = 2.8945e-04
Loss = 3.7141e-01, PNorm = 61.0931, GNorm = 2.3253, lr_0 = 2.8925e-04
Loss = 3.8865e-01, PNorm = 61.0950, GNorm = 1.3431, lr_0 = 2.8906e-04
Loss = 3.6736e-01, PNorm = 61.1025, GNorm = 1.3846, lr_0 = 2.8886e-04
Loss = 4.0810e-01, PNorm = 61.1040, GNorm = 1.9368, lr_0 = 2.8866e-04
Loss = 4.4494e-01, PNorm = 61.1084, GNorm = 1.3682, lr_0 = 2.8846e-04
Loss = 3.6344e-01, PNorm = 61.1117, GNorm = 1.2645, lr_0 = 2.8826e-04
Loss = 4.5982e-01, PNorm = 61.1173, GNorm = 2.1901, lr_0 = 2.8807e-04
Loss = 4.2077e-01, PNorm = 61.1242, GNorm = 1.3259, lr_0 = 2.8787e-04
Loss = 3.2502e-01, PNorm = 61.1282, GNorm = 1.5717, lr_0 = 2.8767e-04
Loss = 3.8511e-01, PNorm = 61.1305, GNorm = 1.3343, lr_0 = 2.8748e-04
Loss = 3.5775e-01, PNorm = 61.1349, GNorm = 1.4604, lr_0 = 2.8728e-04
Loss = 4.0366e-01, PNorm = 61.1321, GNorm = 1.1726, lr_0 = 2.8708e-04
Loss = 3.9027e-01, PNorm = 61.1374, GNorm = 1.6279, lr_0 = 2.8689e-04
Loss = 3.5414e-01, PNorm = 61.1414, GNorm = 1.1097, lr_0 = 2.8669e-04
Loss = 3.6461e-01, PNorm = 61.1418, GNorm = 1.8739, lr_0 = 2.8649e-04
Loss = 3.7664e-01, PNorm = 61.1454, GNorm = 1.2044, lr_0 = 2.8630e-04
Loss = 3.6557e-01, PNorm = 61.1458, GNorm = 1.5161, lr_0 = 2.8610e-04
Loss = 3.6922e-01, PNorm = 61.1475, GNorm = 1.1202, lr_0 = 2.8590e-04
Loss = 3.7283e-01, PNorm = 61.1508, GNorm = 1.1147, lr_0 = 2.8571e-04
Loss = 3.3871e-01, PNorm = 61.1552, GNorm = 2.8161, lr_0 = 2.8551e-04
Loss = 3.9538e-01, PNorm = 61.1581, GNorm = 1.6528, lr_0 = 2.8532e-04
Loss = 3.5687e-01, PNorm = 61.1632, GNorm = 1.7078, lr_0 = 2.8512e-04
Loss = 3.7916e-01, PNorm = 61.1633, GNorm = 1.4092, lr_0 = 2.8493e-04
Loss = 3.7664e-01, PNorm = 61.1689, GNorm = 1.9117, lr_0 = 2.8473e-04
Loss = 3.6211e-01, PNorm = 61.1788, GNorm = 1.7830, lr_0 = 2.8454e-04
Loss = 4.5169e-01, PNorm = 61.1795, GNorm = 1.9949, lr_0 = 2.8434e-04
Loss = 4.1536e-01, PNorm = 61.1844, GNorm = 2.1539, lr_0 = 2.8415e-04
Loss = 3.5495e-01, PNorm = 61.1880, GNorm = 1.3981, lr_0 = 2.8395e-04
Loss = 3.8258e-01, PNorm = 61.1916, GNorm = 1.0511, lr_0 = 2.8376e-04
Loss = 4.1571e-01, PNorm = 61.1964, GNorm = 2.3302, lr_0 = 2.8356e-04
Loss = 3.3589e-01, PNorm = 61.2001, GNorm = 1.2734, lr_0 = 2.8337e-04
Loss = 3.5666e-01, PNorm = 61.2047, GNorm = 1.3383, lr_0 = 2.8317e-04
Loss = 3.8563e-01, PNorm = 61.2077, GNorm = 1.5463, lr_0 = 2.8298e-04
Loss = 4.7674e-01, PNorm = 61.2122, GNorm = 1.5086, lr_0 = 2.8279e-04
Loss = 3.6605e-01, PNorm = 61.2146, GNorm = 1.0341, lr_0 = 2.8259e-04
Loss = 3.5870e-01, PNorm = 61.2187, GNorm = 0.9977, lr_0 = 2.8240e-04
Loss = 3.8722e-01, PNorm = 61.2181, GNorm = 1.4930, lr_0 = 2.8221e-04
Loss = 3.7746e-01, PNorm = 61.2237, GNorm = 1.4954, lr_0 = 2.8201e-04
Loss = 3.5406e-01, PNorm = 61.2263, GNorm = 2.5724, lr_0 = 2.8182e-04
Loss = 3.6118e-01, PNorm = 61.2328, GNorm = 1.8629, lr_0 = 2.8163e-04
Loss = 4.1184e-01, PNorm = 61.2370, GNorm = 1.2859, lr_0 = 2.8143e-04
Loss = 3.1640e-01, PNorm = 61.2431, GNorm = 1.5400, lr_0 = 2.8124e-04
Loss = 3.4531e-01, PNorm = 61.2452, GNorm = 1.1450, lr_0 = 2.8105e-04
Loss = 3.4442e-01, PNorm = 61.2448, GNorm = 1.4177, lr_0 = 2.8085e-04
Loss = 3.6319e-01, PNorm = 61.2497, GNorm = 1.3585, lr_0 = 2.8066e-04
Loss = 3.2171e-01, PNorm = 61.2541, GNorm = 1.1023, lr_0 = 2.8047e-04
Loss = 3.9792e-01, PNorm = 61.2575, GNorm = 1.5956, lr_0 = 2.8028e-04
Loss = 3.8907e-01, PNorm = 61.2595, GNorm = 1.1450, lr_0 = 2.8009e-04
Loss = 3.6098e-01, PNorm = 61.2627, GNorm = 1.6013, lr_0 = 2.7989e-04
Loss = 3.2883e-01, PNorm = 61.2633, GNorm = 1.3285, lr_0 = 2.7970e-04
Loss = 3.9213e-01, PNorm = 61.2670, GNorm = 1.2504, lr_0 = 2.7951e-04
Loss = 3.4044e-01, PNorm = 61.2752, GNorm = 1.4785, lr_0 = 2.7932e-04
Loss = 4.0254e-01, PNorm = 61.2779, GNorm = 1.3717, lr_0 = 2.7913e-04
Loss = 3.5011e-01, PNorm = 61.2801, GNorm = 1.2316, lr_0 = 2.7894e-04
Loss = 3.1708e-01, PNorm = 61.2827, GNorm = 1.0383, lr_0 = 2.7875e-04
Loss = 4.2382e-01, PNorm = 61.2846, GNorm = 1.5694, lr_0 = 2.7855e-04
Loss = 3.7566e-01, PNorm = 61.2903, GNorm = 1.5089, lr_0 = 2.7836e-04
Loss = 3.9040e-01, PNorm = 61.2920, GNorm = 1.4627, lr_0 = 2.7817e-04
Loss = 3.7481e-01, PNorm = 61.2973, GNorm = 1.0728, lr_0 = 2.7798e-04
Loss = 3.5007e-01, PNorm = 61.3004, GNorm = 1.5047, lr_0 = 2.7779e-04
Loss = 3.7778e-01, PNorm = 61.3048, GNorm = 1.2973, lr_0 = 2.7760e-04
Loss = 3.6652e-01, PNorm = 61.3103, GNorm = 1.4027, lr_0 = 2.7741e-04
Loss = 3.7527e-01, PNorm = 61.3108, GNorm = 1.2197, lr_0 = 2.7722e-04
Loss = 3.5062e-01, PNorm = 61.3148, GNorm = 1.5844, lr_0 = 2.7703e-04
Loss = 3.6018e-01, PNorm = 61.3199, GNorm = 1.6207, lr_0 = 2.7684e-04
Loss = 3.5794e-01, PNorm = 61.3219, GNorm = 1.8245, lr_0 = 2.7665e-04
Loss = 3.8855e-01, PNorm = 61.3267, GNorm = 1.6108, lr_0 = 2.7646e-04
Loss = 3.7493e-01, PNorm = 61.3278, GNorm = 1.1991, lr_0 = 2.7627e-04
Loss = 3.5698e-01, PNorm = 61.3288, GNorm = 1.6957, lr_0 = 2.7608e-04
Loss = 3.8967e-01, PNorm = 61.3344, GNorm = 1.6586, lr_0 = 2.7590e-04
Loss = 3.5315e-01, PNorm = 61.3370, GNorm = 1.1675, lr_0 = 2.7571e-04
Loss = 3.3278e-01, PNorm = 61.3390, GNorm = 1.6010, lr_0 = 2.7552e-04
Loss = 3.5243e-01, PNorm = 61.3420, GNorm = 1.3728, lr_0 = 2.7533e-04
Loss = 4.0321e-01, PNorm = 61.3456, GNorm = 1.0645, lr_0 = 2.7514e-04
Loss = 3.8844e-01, PNorm = 61.3453, GNorm = 1.8783, lr_0 = 2.7495e-04
Loss = 4.0422e-01, PNorm = 61.3484, GNorm = 1.4617, lr_0 = 2.7476e-04
Loss = 3.7193e-01, PNorm = 61.3527, GNorm = 1.5165, lr_0 = 2.7457e-04
Loss = 3.4955e-01, PNorm = 61.3552, GNorm = 1.7394, lr_0 = 2.7439e-04
Loss = 3.7809e-01, PNorm = 61.3552, GNorm = 1.5175, lr_0 = 2.7420e-04
Loss = 3.6362e-01, PNorm = 61.3530, GNorm = 1.4942, lr_0 = 2.7401e-04
Loss = 4.1032e-01, PNorm = 61.3570, GNorm = 1.6696, lr_0 = 2.7382e-04
Loss = 3.5473e-01, PNorm = 61.3581, GNorm = 1.5247, lr_0 = 2.7364e-04
Loss = 3.8522e-01, PNorm = 61.3621, GNorm = 1.4408, lr_0 = 2.7345e-04
Loss = 4.1971e-01, PNorm = 61.3651, GNorm = 1.6572, lr_0 = 2.7326e-04
Loss = 3.4093e-01, PNorm = 61.3670, GNorm = 1.4190, lr_0 = 2.7307e-04
Loss = 3.6696e-01, PNorm = 61.3703, GNorm = 1.2818, lr_0 = 2.7289e-04
Loss = 3.9309e-01, PNorm = 61.3710, GNorm = 2.0478, lr_0 = 2.7270e-04
Loss = 3.5692e-01, PNorm = 61.3760, GNorm = 1.3266, lr_0 = 2.7251e-04
Loss = 4.0950e-01, PNorm = 61.3809, GNorm = 1.4372, lr_0 = 2.7233e-04
Loss = 3.7715e-01, PNorm = 61.3821, GNorm = 1.0885, lr_0 = 2.7214e-04
Loss = 3.6676e-01, PNorm = 61.3851, GNorm = 1.2785, lr_0 = 2.7195e-04
Loss = 3.7861e-01, PNorm = 61.3932, GNorm = 1.0503, lr_0 = 2.7177e-04
Loss = 4.1338e-01, PNorm = 61.3986, GNorm = 1.2075, lr_0 = 2.7158e-04
Loss = 3.6037e-01, PNorm = 61.4035, GNorm = 1.0661, lr_0 = 2.7139e-04
Loss = 3.8175e-01, PNorm = 61.4064, GNorm = 1.1683, lr_0 = 2.7121e-04
Loss = 3.6535e-01, PNorm = 61.4110, GNorm = 2.0744, lr_0 = 2.7102e-04
Loss = 3.6259e-01, PNorm = 61.4119, GNorm = 0.9470, lr_0 = 2.7084e-04
Loss = 3.8900e-01, PNorm = 61.4135, GNorm = 1.9810, lr_0 = 2.7065e-04
Loss = 3.7022e-01, PNorm = 61.4156, GNorm = 0.9839, lr_0 = 2.7047e-04
Loss = 3.3790e-01, PNorm = 61.4206, GNorm = 1.0827, lr_0 = 2.7028e-04
Loss = 4.5899e-01, PNorm = 61.4250, GNorm = 1.3775, lr_0 = 2.7010e-04
Loss = 3.1205e-01, PNorm = 61.4266, GNorm = 1.3820, lr_0 = 2.6991e-04
Loss = 3.9111e-01, PNorm = 61.4320, GNorm = 1.4516, lr_0 = 2.6973e-04
Loss = 3.7839e-01, PNorm = 61.4362, GNorm = 1.5608, lr_0 = 2.6954e-04
Loss = 3.3562e-01, PNorm = 61.4374, GNorm = 1.3479, lr_0 = 2.6936e-04
Loss = 3.4206e-01, PNorm = 61.4375, GNorm = 1.2773, lr_0 = 2.6917e-04
Loss = 3.8642e-01, PNorm = 61.4335, GNorm = 1.4459, lr_0 = 2.6899e-04
Loss = 3.5035e-01, PNorm = 61.4345, GNorm = 1.9581, lr_0 = 2.6880e-04
Loss = 3.5415e-01, PNorm = 61.4363, GNorm = 1.7498, lr_0 = 2.6862e-04
Loss = 3.4675e-01, PNorm = 61.4366, GNorm = 2.0184, lr_0 = 2.6844e-04
Loss = 4.3187e-01, PNorm = 61.4386, GNorm = 1.1084, lr_0 = 2.6825e-04
Validation mae = 0.112538
Epoch 18
Loss = 3.0479e-01, PNorm = 61.4427, GNorm = 1.0912, lr_0 = 2.6807e-04
Loss = 3.7629e-01, PNorm = 61.4464, GNorm = 1.3394, lr_0 = 2.6788e-04
Loss = 4.1361e-01, PNorm = 61.4510, GNorm = 1.5451, lr_0 = 2.6770e-04
Loss = 4.2224e-01, PNorm = 61.4564, GNorm = 1.1725, lr_0 = 2.6752e-04
Loss = 3.7884e-01, PNorm = 61.4606, GNorm = 1.6430, lr_0 = 2.6733e-04
Loss = 3.2684e-01, PNorm = 61.4648, GNorm = 1.7906, lr_0 = 2.6715e-04
Loss = 3.7362e-01, PNorm = 61.4688, GNorm = 1.4288, lr_0 = 2.6697e-04
Loss = 3.6118e-01, PNorm = 61.4698, GNorm = 1.2774, lr_0 = 2.6678e-04
Loss = 3.6742e-01, PNorm = 61.4736, GNorm = 1.4708, lr_0 = 2.6660e-04
Loss = 3.8969e-01, PNorm = 61.4780, GNorm = 1.6664, lr_0 = 2.6642e-04
Loss = 3.6649e-01, PNorm = 61.4822, GNorm = 1.1558, lr_0 = 2.6624e-04
Loss = 3.6861e-01, PNorm = 61.4842, GNorm = 1.3003, lr_0 = 2.6605e-04
Loss = 4.0943e-01, PNorm = 61.4934, GNorm = 1.3097, lr_0 = 2.6587e-04
Loss = 4.2236e-01, PNorm = 61.4967, GNorm = 1.6464, lr_0 = 2.6569e-04
Loss = 3.5361e-01, PNorm = 61.5031, GNorm = 1.2743, lr_0 = 2.6551e-04
Loss = 3.4192e-01, PNorm = 61.5105, GNorm = 2.0488, lr_0 = 2.6533e-04
Loss = 3.4098e-01, PNorm = 61.5122, GNorm = 1.2920, lr_0 = 2.6514e-04
Loss = 3.8943e-01, PNorm = 61.5156, GNorm = 1.4394, lr_0 = 2.6496e-04
Loss = 4.0146e-01, PNorm = 61.5193, GNorm = 1.9883, lr_0 = 2.6478e-04
Loss = 3.5678e-01, PNorm = 61.5195, GNorm = 1.4978, lr_0 = 2.6460e-04
Loss = 3.5618e-01, PNorm = 61.5207, GNorm = 1.4260, lr_0 = 2.6442e-04
Loss = 3.3859e-01, PNorm = 61.5232, GNorm = 1.3992, lr_0 = 2.6424e-04
Loss = 3.6314e-01, PNorm = 61.5286, GNorm = 1.1415, lr_0 = 2.6406e-04
Loss = 3.5427e-01, PNorm = 61.5315, GNorm = 2.5517, lr_0 = 2.6388e-04
Loss = 3.7179e-01, PNorm = 61.5338, GNorm = 1.2541, lr_0 = 2.6369e-04
Loss = 3.3360e-01, PNorm = 61.5364, GNorm = 0.9299, lr_0 = 2.6351e-04
Loss = 4.2594e-01, PNorm = 61.5357, GNorm = 1.7571, lr_0 = 2.6333e-04
Loss = 3.0960e-01, PNorm = 61.5419, GNorm = 1.2753, lr_0 = 2.6315e-04
Loss = 3.7691e-01, PNorm = 61.5459, GNorm = 2.6569, lr_0 = 2.6297e-04
Loss = 3.8675e-01, PNorm = 61.5476, GNorm = 1.3644, lr_0 = 2.6279e-04
Loss = 3.7944e-01, PNorm = 61.5532, GNorm = 1.8788, lr_0 = 2.6261e-04
Loss = 3.4707e-01, PNorm = 61.5548, GNorm = 1.5380, lr_0 = 2.6243e-04
Loss = 4.1876e-01, PNorm = 61.5569, GNorm = 1.4824, lr_0 = 2.6225e-04
Loss = 4.1914e-01, PNorm = 61.5615, GNorm = 1.7101, lr_0 = 2.6207e-04
Loss = 3.8307e-01, PNorm = 61.5635, GNorm = 1.1670, lr_0 = 2.6189e-04
Loss = 3.9887e-01, PNorm = 61.5683, GNorm = 1.6102, lr_0 = 2.6171e-04
Loss = 3.7351e-01, PNorm = 61.5726, GNorm = 1.1679, lr_0 = 2.6153e-04
Loss = 3.9417e-01, PNorm = 61.5758, GNorm = 1.5006, lr_0 = 2.6136e-04
Loss = 4.1558e-01, PNorm = 61.5786, GNorm = 1.4193, lr_0 = 2.6118e-04
Loss = 3.8245e-01, PNorm = 61.5825, GNorm = 1.4060, lr_0 = 2.6100e-04
Loss = 3.8418e-01, PNorm = 61.5847, GNorm = 2.2708, lr_0 = 2.6082e-04
Loss = 3.4212e-01, PNorm = 61.5863, GNorm = 1.1757, lr_0 = 2.6064e-04
Loss = 3.9118e-01, PNorm = 61.5914, GNorm = 1.4016, lr_0 = 2.6046e-04
Loss = 3.3763e-01, PNorm = 61.5929, GNorm = 1.3189, lr_0 = 2.6028e-04
Loss = 3.7613e-01, PNorm = 61.5917, GNorm = 1.5973, lr_0 = 2.6011e-04
Loss = 3.8730e-01, PNorm = 61.5953, GNorm = 1.3285, lr_0 = 2.5993e-04
Loss = 3.8290e-01, PNorm = 61.6010, GNorm = 1.2978, lr_0 = 2.5975e-04
Loss = 3.2297e-01, PNorm = 61.6041, GNorm = 1.2561, lr_0 = 2.5957e-04
Loss = 3.6973e-01, PNorm = 61.6031, GNorm = 2.0179, lr_0 = 2.5939e-04
Loss = 3.4681e-01, PNorm = 61.6078, GNorm = 1.5523, lr_0 = 2.5922e-04
Loss = 3.4530e-01, PNorm = 61.6081, GNorm = 1.3885, lr_0 = 2.5904e-04
Loss = 3.5554e-01, PNorm = 61.6101, GNorm = 1.8550, lr_0 = 2.5886e-04
Loss = 4.0979e-01, PNorm = 61.6151, GNorm = 1.6903, lr_0 = 2.5868e-04
Loss = 3.8369e-01, PNorm = 61.6199, GNorm = 0.9525, lr_0 = 2.5851e-04
Loss = 3.6387e-01, PNorm = 61.6215, GNorm = 1.7130, lr_0 = 2.5833e-04
Loss = 3.5914e-01, PNorm = 61.6211, GNorm = 0.9550, lr_0 = 2.5815e-04
Loss = 4.1120e-01, PNorm = 61.6222, GNorm = 1.7999, lr_0 = 2.5797e-04
Loss = 3.7764e-01, PNorm = 61.6236, GNorm = 1.2899, lr_0 = 2.5780e-04
Loss = 3.2485e-01, PNorm = 61.6283, GNorm = 1.1701, lr_0 = 2.5762e-04
Loss = 3.3826e-01, PNorm = 61.6266, GNorm = 1.1273, lr_0 = 2.5745e-04
Loss = 4.3810e-01, PNorm = 61.6326, GNorm = 1.7941, lr_0 = 2.5727e-04
Loss = 3.6345e-01, PNorm = 61.6383, GNorm = 1.5543, lr_0 = 2.5709e-04
Loss = 3.6872e-01, PNorm = 61.6436, GNorm = 1.6348, lr_0 = 2.5692e-04
Loss = 4.2984e-01, PNorm = 61.6458, GNorm = 1.4003, lr_0 = 2.5674e-04
Loss = 3.8035e-01, PNorm = 61.6487, GNorm = 1.2440, lr_0 = 2.5656e-04
Loss = 3.6449e-01, PNorm = 61.6507, GNorm = 1.4025, lr_0 = 2.5639e-04
Loss = 3.6420e-01, PNorm = 61.6559, GNorm = 1.3211, lr_0 = 2.5621e-04
Loss = 4.2770e-01, PNorm = 61.6565, GNorm = 2.3017, lr_0 = 2.5604e-04
Loss = 3.5836e-01, PNorm = 61.6575, GNorm = 1.1707, lr_0 = 2.5586e-04
Loss = 4.5310e-01, PNorm = 61.6619, GNorm = 1.7973, lr_0 = 2.5569e-04
Loss = 3.2280e-01, PNorm = 61.6664, GNorm = 0.9270, lr_0 = 2.5551e-04
Loss = 3.3518e-01, PNorm = 61.6702, GNorm = 2.2805, lr_0 = 2.5534e-04
Loss = 3.4172e-01, PNorm = 61.6730, GNorm = 1.2476, lr_0 = 2.5516e-04
Loss = 3.8350e-01, PNorm = 61.6796, GNorm = 1.4190, lr_0 = 2.5499e-04
Loss = 3.8308e-01, PNorm = 61.6811, GNorm = 1.6560, lr_0 = 2.5481e-04
Loss = 3.3560e-01, PNorm = 61.6833, GNorm = 1.4941, lr_0 = 2.5464e-04
Loss = 4.3084e-01, PNorm = 61.6885, GNorm = 2.0672, lr_0 = 2.5446e-04
Loss = 3.2978e-01, PNorm = 61.6934, GNorm = 2.0271, lr_0 = 2.5429e-04
Loss = 3.6122e-01, PNorm = 61.6959, GNorm = 1.3379, lr_0 = 2.5411e-04
Loss = 3.5349e-01, PNorm = 61.6993, GNorm = 1.2352, lr_0 = 2.5394e-04
Loss = 3.2270e-01, PNorm = 61.6994, GNorm = 1.1384, lr_0 = 2.5377e-04
Loss = 3.2564e-01, PNorm = 61.7025, GNorm = 1.2361, lr_0 = 2.5359e-04
Loss = 4.0390e-01, PNorm = 61.7058, GNorm = 2.1315, lr_0 = 2.5342e-04
Loss = 3.7272e-01, PNorm = 61.7079, GNorm = 1.3917, lr_0 = 2.5325e-04
Loss = 3.2915e-01, PNorm = 61.7093, GNorm = 1.3629, lr_0 = 2.5307e-04
Loss = 3.9799e-01, PNorm = 61.7115, GNorm = 4.6366, lr_0 = 2.5290e-04
Loss = 3.8162e-01, PNorm = 61.7120, GNorm = 2.6606, lr_0 = 2.5273e-04
Loss = 3.7114e-01, PNorm = 61.7141, GNorm = 1.3414, lr_0 = 2.5255e-04
Loss = 4.5404e-01, PNorm = 61.7239, GNorm = 1.0399, lr_0 = 2.5238e-04
Loss = 3.9522e-01, PNorm = 61.7291, GNorm = 1.3137, lr_0 = 2.5221e-04
Loss = 3.6837e-01, PNorm = 61.7328, GNorm = 1.2560, lr_0 = 2.5203e-04
Loss = 3.7669e-01, PNorm = 61.7351, GNorm = 1.4576, lr_0 = 2.5186e-04
Loss = 4.5622e-01, PNorm = 61.7380, GNorm = 1.8872, lr_0 = 2.5169e-04
Loss = 3.8931e-01, PNorm = 61.7404, GNorm = 1.6414, lr_0 = 2.5152e-04
Loss = 3.7421e-01, PNorm = 61.7417, GNorm = 1.0693, lr_0 = 2.5134e-04
Loss = 3.9151e-01, PNorm = 61.7454, GNorm = 1.1004, lr_0 = 2.5117e-04
Loss = 3.3480e-01, PNorm = 61.7514, GNorm = 1.3323, lr_0 = 2.5100e-04
Loss = 3.5642e-01, PNorm = 61.7532, GNorm = 1.3572, lr_0 = 2.5083e-04
Loss = 3.5069e-01, PNorm = 61.7541, GNorm = 1.7759, lr_0 = 2.5066e-04
Loss = 3.9707e-01, PNorm = 61.7568, GNorm = 1.6512, lr_0 = 2.5048e-04
Loss = 3.4607e-01, PNorm = 61.7614, GNorm = 1.2449, lr_0 = 2.5031e-04
Loss = 3.6146e-01, PNorm = 61.7632, GNorm = 2.5609, lr_0 = 2.5014e-04
Loss = 3.9610e-01, PNorm = 61.7638, GNorm = 1.3296, lr_0 = 2.4997e-04
Loss = 3.2421e-01, PNorm = 61.7665, GNorm = 1.2892, lr_0 = 2.4980e-04
Loss = 3.5124e-01, PNorm = 61.7704, GNorm = 1.6839, lr_0 = 2.4963e-04
Loss = 4.0119e-01, PNorm = 61.7779, GNorm = 1.7394, lr_0 = 2.4946e-04
Loss = 4.0564e-01, PNorm = 61.7793, GNorm = 1.3010, lr_0 = 2.4929e-04
Loss = 3.4423e-01, PNorm = 61.7802, GNorm = 2.0852, lr_0 = 2.4911e-04
Loss = 3.5298e-01, PNorm = 61.7834, GNorm = 1.2844, lr_0 = 2.4894e-04
Loss = 3.6458e-01, PNorm = 61.7858, GNorm = 1.5596, lr_0 = 2.4877e-04
Loss = 3.6184e-01, PNorm = 61.7873, GNorm = 1.6337, lr_0 = 2.4860e-04
Loss = 3.9049e-01, PNorm = 61.7887, GNorm = 1.4337, lr_0 = 2.4843e-04
Loss = 3.3979e-01, PNorm = 61.7923, GNorm = 1.8581, lr_0 = 2.4826e-04
Loss = 4.0355e-01, PNorm = 61.7969, GNorm = 1.6486, lr_0 = 2.4809e-04
Loss = 3.6181e-01, PNorm = 61.7986, GNorm = 1.3619, lr_0 = 2.4792e-04
Loss = 3.2892e-01, PNorm = 61.8007, GNorm = 1.7417, lr_0 = 2.4775e-04
Loss = 4.3121e-01, PNorm = 61.8042, GNorm = 1.0340, lr_0 = 2.4758e-04
Loss = 3.9258e-01, PNorm = 61.8075, GNorm = 1.5411, lr_0 = 2.4741e-04
Loss = 3.6032e-01, PNorm = 61.8123, GNorm = 1.1304, lr_0 = 2.4724e-04
Loss = 3.4518e-01, PNorm = 61.8138, GNorm = 1.0633, lr_0 = 2.4707e-04
Validation mae = 0.112156
Epoch 19
Loss = 3.8795e-01, PNorm = 61.8156, GNorm = 1.7550, lr_0 = 2.4690e-04
Loss = 3.6936e-01, PNorm = 61.8198, GNorm = 1.9688, lr_0 = 2.4674e-04
Loss = 3.5238e-01, PNorm = 61.8222, GNorm = 1.0241, lr_0 = 2.4657e-04
Loss = 3.8848e-01, PNorm = 61.8257, GNorm = 1.7681, lr_0 = 2.4640e-04
Loss = 3.8251e-01, PNorm = 61.8321, GNorm = 1.9084, lr_0 = 2.4623e-04
Loss = 3.4798e-01, PNorm = 61.8392, GNorm = 1.5973, lr_0 = 2.4606e-04
Loss = 3.5269e-01, PNorm = 61.8410, GNorm = 2.5367, lr_0 = 2.4589e-04
Loss = 3.5227e-01, PNorm = 61.8394, GNorm = 1.2867, lr_0 = 2.4572e-04
Loss = 3.5866e-01, PNorm = 61.8411, GNorm = 1.4389, lr_0 = 2.4556e-04
Loss = 3.4118e-01, PNorm = 61.8483, GNorm = 1.2305, lr_0 = 2.4539e-04
Loss = 3.6775e-01, PNorm = 61.8543, GNorm = 1.6205, lr_0 = 2.4522e-04
Loss = 3.2816e-01, PNorm = 61.8545, GNorm = 1.3218, lr_0 = 2.4505e-04
Loss = 3.7541e-01, PNorm = 61.8567, GNorm = 1.2239, lr_0 = 2.4488e-04
Loss = 3.8858e-01, PNorm = 61.8619, GNorm = 1.5684, lr_0 = 2.4472e-04
Loss = 3.4492e-01, PNorm = 61.8657, GNorm = 1.7678, lr_0 = 2.4455e-04
Loss = 3.4686e-01, PNorm = 61.8649, GNorm = 1.3808, lr_0 = 2.4438e-04
Loss = 3.3835e-01, PNorm = 61.8705, GNorm = 1.0369, lr_0 = 2.4421e-04
Loss = 3.3444e-01, PNorm = 61.8771, GNorm = 1.6642, lr_0 = 2.4405e-04
Loss = 3.3177e-01, PNorm = 61.8811, GNorm = 1.4133, lr_0 = 2.4388e-04
Loss = 3.8620e-01, PNorm = 61.8802, GNorm = 1.2239, lr_0 = 2.4371e-04
Loss = 3.4437e-01, PNorm = 61.8829, GNorm = 1.5738, lr_0 = 2.4354e-04
Loss = 3.5848e-01, PNorm = 61.8878, GNorm = 2.0479, lr_0 = 2.4338e-04
Loss = 3.3793e-01, PNorm = 61.8879, GNorm = 1.3099, lr_0 = 2.4321e-04
Loss = 3.3690e-01, PNorm = 61.8911, GNorm = 1.4020, lr_0 = 2.4304e-04
Loss = 3.6260e-01, PNorm = 61.8931, GNorm = 1.3555, lr_0 = 2.4288e-04
Loss = 3.4809e-01, PNorm = 61.8944, GNorm = 1.4140, lr_0 = 2.4271e-04
Loss = 3.6923e-01, PNorm = 61.8976, GNorm = 1.5451, lr_0 = 2.4254e-04
Loss = 3.7799e-01, PNorm = 61.8992, GNorm = 1.6916, lr_0 = 2.4238e-04
Loss = 3.8119e-01, PNorm = 61.9008, GNorm = 1.1514, lr_0 = 2.4221e-04
Loss = 3.2330e-01, PNorm = 61.9054, GNorm = 1.0521, lr_0 = 2.4205e-04
Loss = 4.1754e-01, PNorm = 61.9056, GNorm = 1.0682, lr_0 = 2.4188e-04
Loss = 3.4821e-01, PNorm = 61.9073, GNorm = 1.5733, lr_0 = 2.4171e-04
Loss = 3.9186e-01, PNorm = 61.9136, GNorm = 1.2164, lr_0 = 2.4155e-04
Loss = 3.6650e-01, PNorm = 61.9175, GNorm = 2.0266, lr_0 = 2.4138e-04
Loss = 4.2338e-01, PNorm = 61.9215, GNorm = 1.6417, lr_0 = 2.4122e-04
Loss = 3.8870e-01, PNorm = 61.9241, GNorm = 1.5548, lr_0 = 2.4105e-04
Loss = 3.5094e-01, PNorm = 61.9270, GNorm = 1.6800, lr_0 = 2.4089e-04
Loss = 3.3199e-01, PNorm = 61.9287, GNorm = 1.7662, lr_0 = 2.4072e-04
Loss = 3.5684e-01, PNorm = 61.9307, GNorm = 1.9692, lr_0 = 2.4056e-04
Loss = 3.6845e-01, PNorm = 61.9338, GNorm = 1.4329, lr_0 = 2.4039e-04
Loss = 4.1662e-01, PNorm = 61.9402, GNorm = 1.7991, lr_0 = 2.4023e-04
Loss = 3.9044e-01, PNorm = 61.9428, GNorm = 1.2535, lr_0 = 2.4006e-04
Loss = 3.8036e-01, PNorm = 61.9457, GNorm = 1.4840, lr_0 = 2.3990e-04
Loss = 3.7581e-01, PNorm = 61.9499, GNorm = 1.1924, lr_0 = 2.3974e-04
Loss = 3.7032e-01, PNorm = 61.9508, GNorm = 1.5176, lr_0 = 2.3957e-04
Loss = 3.4555e-01, PNorm = 61.9540, GNorm = 1.4953, lr_0 = 2.3941e-04
Loss = 3.7403e-01, PNorm = 61.9558, GNorm = 1.4000, lr_0 = 2.3924e-04
Loss = 4.0954e-01, PNorm = 61.9577, GNorm = 1.5276, lr_0 = 2.3908e-04
Loss = 3.3217e-01, PNorm = 61.9603, GNorm = 1.1560, lr_0 = 2.3892e-04
Loss = 3.4930e-01, PNorm = 61.9643, GNorm = 1.2853, lr_0 = 2.3875e-04
Loss = 3.8830e-01, PNorm = 61.9691, GNorm = 1.3012, lr_0 = 2.3859e-04
Loss = 3.4236e-01, PNorm = 61.9721, GNorm = 1.1991, lr_0 = 2.3842e-04
Loss = 3.5117e-01, PNorm = 61.9770, GNorm = 1.2826, lr_0 = 2.3826e-04
Loss = 3.4217e-01, PNorm = 61.9793, GNorm = 1.5166, lr_0 = 2.3810e-04
Loss = 3.7733e-01, PNorm = 61.9815, GNorm = 1.5213, lr_0 = 2.3794e-04
Loss = 3.5828e-01, PNorm = 61.9833, GNorm = 2.3206, lr_0 = 2.3777e-04
Loss = 3.1201e-01, PNorm = 61.9867, GNorm = 1.2088, lr_0 = 2.3761e-04
Loss = 4.0058e-01, PNorm = 61.9865, GNorm = 1.9364, lr_0 = 2.3745e-04
Loss = 3.5809e-01, PNorm = 61.9885, GNorm = 1.6900, lr_0 = 2.3728e-04
Loss = 3.8584e-01, PNorm = 61.9883, GNorm = 1.4702, lr_0 = 2.3712e-04
Loss = 3.3369e-01, PNorm = 61.9891, GNorm = 1.5135, lr_0 = 2.3696e-04
Loss = 3.5534e-01, PNorm = 61.9892, GNorm = 1.3606, lr_0 = 2.3680e-04
Loss = 3.8530e-01, PNorm = 61.9883, GNorm = 1.5702, lr_0 = 2.3663e-04
Loss = 3.6231e-01, PNorm = 61.9918, GNorm = 1.3025, lr_0 = 2.3647e-04
Loss = 3.8910e-01, PNorm = 61.9958, GNorm = 2.1323, lr_0 = 2.3631e-04
Loss = 4.5453e-01, PNorm = 61.9979, GNorm = 1.6790, lr_0 = 2.3615e-04
Loss = 3.5627e-01, PNorm = 62.0006, GNorm = 1.4448, lr_0 = 2.3599e-04
Loss = 4.1532e-01, PNorm = 62.0014, GNorm = 1.2182, lr_0 = 2.3582e-04
Loss = 3.6019e-01, PNorm = 62.0032, GNorm = 1.1878, lr_0 = 2.3566e-04
Loss = 3.3981e-01, PNorm = 62.0044, GNorm = 1.4695, lr_0 = 2.3550e-04
Loss = 3.7159e-01, PNorm = 62.0069, GNorm = 1.2585, lr_0 = 2.3534e-04
Loss = 3.4824e-01, PNorm = 62.0069, GNorm = 1.3430, lr_0 = 2.3518e-04
Loss = 4.1463e-01, PNorm = 62.0079, GNorm = 1.6761, lr_0 = 2.3502e-04
Loss = 4.0764e-01, PNorm = 62.0108, GNorm = 1.5922, lr_0 = 2.3486e-04
Loss = 4.3564e-01, PNorm = 62.0116, GNorm = 1.9570, lr_0 = 2.3470e-04
Loss = 4.1846e-01, PNorm = 62.0164, GNorm = 1.8155, lr_0 = 2.3454e-04
Loss = 3.8916e-01, PNorm = 62.0192, GNorm = 1.1332, lr_0 = 2.3437e-04
Loss = 3.9796e-01, PNorm = 62.0190, GNorm = 1.6126, lr_0 = 2.3421e-04
Loss = 4.0679e-01, PNorm = 62.0244, GNorm = 1.0860, lr_0 = 2.3405e-04
Loss = 3.6031e-01, PNorm = 62.0285, GNorm = 2.1624, lr_0 = 2.3389e-04
Loss = 3.4398e-01, PNorm = 62.0300, GNorm = 1.6062, lr_0 = 2.3373e-04
Loss = 3.5482e-01, PNorm = 62.0328, GNorm = 1.2449, lr_0 = 2.3357e-04
Loss = 3.3669e-01, PNorm = 62.0335, GNorm = 1.1290, lr_0 = 2.3341e-04
Loss = 3.6067e-01, PNorm = 62.0360, GNorm = 1.0991, lr_0 = 2.3325e-04
Loss = 3.2704e-01, PNorm = 62.0369, GNorm = 2.1425, lr_0 = 2.3309e-04
Loss = 3.5649e-01, PNorm = 62.0423, GNorm = 1.1562, lr_0 = 2.3293e-04
Loss = 3.8033e-01, PNorm = 62.0442, GNorm = 1.1533, lr_0 = 2.3277e-04
Loss = 3.5052e-01, PNorm = 62.0480, GNorm = 1.8409, lr_0 = 2.3261e-04
Loss = 3.5155e-01, PNorm = 62.0503, GNorm = 1.6228, lr_0 = 2.3246e-04
Loss = 3.7913e-01, PNorm = 62.0506, GNorm = 1.3314, lr_0 = 2.3230e-04
Loss = 3.1934e-01, PNorm = 62.0530, GNorm = 1.3980, lr_0 = 2.3214e-04
Loss = 3.7236e-01, PNorm = 62.0591, GNorm = 1.2100, lr_0 = 2.3198e-04
Loss = 4.2163e-01, PNorm = 62.0652, GNorm = 1.6250, lr_0 = 2.3182e-04
Loss = 3.9146e-01, PNorm = 62.0673, GNorm = 1.2561, lr_0 = 2.3166e-04
Loss = 3.5928e-01, PNorm = 62.0687, GNorm = 1.1444, lr_0 = 2.3150e-04
Loss = 3.3348e-01, PNorm = 62.0719, GNorm = 1.7916, lr_0 = 2.3134e-04
Loss = 3.5353e-01, PNorm = 62.0738, GNorm = 1.3958, lr_0 = 2.3118e-04
Loss = 3.8945e-01, PNorm = 62.0744, GNorm = 1.2676, lr_0 = 2.3103e-04
Loss = 3.7750e-01, PNorm = 62.0773, GNorm = 1.6475, lr_0 = 2.3087e-04
Loss = 3.4622e-01, PNorm = 62.0793, GNorm = 1.2847, lr_0 = 2.3071e-04
Loss = 3.7371e-01, PNorm = 62.0807, GNorm = 1.1978, lr_0 = 2.3055e-04
Loss = 3.8332e-01, PNorm = 62.0828, GNorm = 1.3572, lr_0 = 2.3039e-04
Loss = 3.7020e-01, PNorm = 62.0863, GNorm = 1.2400, lr_0 = 2.3024e-04
Loss = 3.7223e-01, PNorm = 62.0877, GNorm = 1.5790, lr_0 = 2.3008e-04
Loss = 3.7387e-01, PNorm = 62.0915, GNorm = 1.2450, lr_0 = 2.2992e-04
Loss = 3.7624e-01, PNorm = 62.0921, GNorm = 1.3226, lr_0 = 2.2976e-04
Loss = 4.0025e-01, PNorm = 62.0939, GNorm = 1.4629, lr_0 = 2.2961e-04
Loss = 3.8055e-01, PNorm = 62.0968, GNorm = 1.6808, lr_0 = 2.2945e-04
Loss = 3.6852e-01, PNorm = 62.0962, GNorm = 1.7873, lr_0 = 2.2929e-04
Loss = 2.9171e-01, PNorm = 62.0978, GNorm = 1.0554, lr_0 = 2.2913e-04
Loss = 4.0753e-01, PNorm = 62.1010, GNorm = 2.2794, lr_0 = 2.2898e-04
Loss = 3.6593e-01, PNorm = 62.1034, GNorm = 1.4966, lr_0 = 2.2882e-04
Loss = 3.8116e-01, PNorm = 62.1054, GNorm = 1.8345, lr_0 = 2.2866e-04
Loss = 4.1907e-01, PNorm = 62.1075, GNorm = 1.2653, lr_0 = 2.2851e-04
Loss = 3.5385e-01, PNorm = 62.1117, GNorm = 2.4260, lr_0 = 2.2835e-04
Loss = 3.8646e-01, PNorm = 62.1124, GNorm = 1.7489, lr_0 = 2.2819e-04
Loss = 3.4451e-01, PNorm = 62.1165, GNorm = 1.2014, lr_0 = 2.2804e-04
Loss = 4.0010e-01, PNorm = 62.1193, GNorm = 1.6981, lr_0 = 2.2788e-04
Loss = 3.2121e-01, PNorm = 62.1207, GNorm = 1.5031, lr_0 = 2.2773e-04
Loss = 3.7890e-01, PNorm = 62.1220, GNorm = 1.2619, lr_0 = 2.2757e-04
Validation mae = 0.111452
Epoch 20
Loss = 3.7304e-01, PNorm = 62.1230, GNorm = 1.0813, lr_0 = 2.2741e-04
Loss = 3.0420e-01, PNorm = 62.1279, GNorm = 1.5474, lr_0 = 2.2726e-04
Loss = 3.6119e-01, PNorm = 62.1319, GNorm = 1.2194, lr_0 = 2.2710e-04
Loss = 3.7251e-01, PNorm = 62.1355, GNorm = 1.3714, lr_0 = 2.2695e-04
Loss = 3.4547e-01, PNorm = 62.1391, GNorm = 1.2163, lr_0 = 2.2679e-04
Loss = 4.2107e-01, PNorm = 62.1437, GNorm = 2.5538, lr_0 = 2.2664e-04
Loss = 3.3865e-01, PNorm = 62.1464, GNorm = 1.1742, lr_0 = 2.2648e-04
Loss = 3.5955e-01, PNorm = 62.1486, GNorm = 1.4943, lr_0 = 2.2632e-04
Loss = 3.7430e-01, PNorm = 62.1511, GNorm = 1.8683, lr_0 = 2.2617e-04
Loss = 3.6639e-01, PNorm = 62.1523, GNorm = 1.5868, lr_0 = 2.2601e-04
Loss = 3.8275e-01, PNorm = 62.1532, GNorm = 1.5006, lr_0 = 2.2586e-04
Loss = 3.8202e-01, PNorm = 62.1552, GNorm = 3.3296, lr_0 = 2.2571e-04
Loss = 3.6671e-01, PNorm = 62.1560, GNorm = 1.7409, lr_0 = 2.2555e-04
Loss = 3.6548e-01, PNorm = 62.1592, GNorm = 1.8258, lr_0 = 2.2540e-04
Loss = 3.8008e-01, PNorm = 62.1621, GNorm = 1.1072, lr_0 = 2.2524e-04
Loss = 3.8720e-01, PNorm = 62.1655, GNorm = 2.2794, lr_0 = 2.2509e-04
Loss = 3.7483e-01, PNorm = 62.1687, GNorm = 1.4589, lr_0 = 2.2493e-04
Loss = 3.4476e-01, PNorm = 62.1708, GNorm = 1.4495, lr_0 = 2.2478e-04
Loss = 2.9767e-01, PNorm = 62.1704, GNorm = 1.3231, lr_0 = 2.2463e-04
Loss = 2.9977e-01, PNorm = 62.1722, GNorm = 1.9095, lr_0 = 2.2447e-04
Loss = 3.5840e-01, PNorm = 62.1746, GNorm = 2.6004, lr_0 = 2.2432e-04
Loss = 4.1001e-01, PNorm = 62.1774, GNorm = 2.1321, lr_0 = 2.2416e-04
Loss = 3.4212e-01, PNorm = 62.1812, GNorm = 1.6652, lr_0 = 2.2401e-04
Loss = 3.4631e-01, PNorm = 62.1819, GNorm = 1.2676, lr_0 = 2.2386e-04
Loss = 3.6321e-01, PNorm = 62.1827, GNorm = 1.6314, lr_0 = 2.2370e-04
Loss = 3.8697e-01, PNorm = 62.1894, GNorm = 1.4906, lr_0 = 2.2355e-04
Loss = 3.3402e-01, PNorm = 62.1941, GNorm = 1.7759, lr_0 = 2.2340e-04
Loss = 3.5885e-01, PNorm = 62.1953, GNorm = 1.6855, lr_0 = 2.2324e-04
Loss = 3.9503e-01, PNorm = 62.1990, GNorm = 1.3374, lr_0 = 2.2309e-04
Loss = 3.4731e-01, PNorm = 62.2025, GNorm = 1.1555, lr_0 = 2.2294e-04
Loss = 4.1918e-01, PNorm = 62.2053, GNorm = 2.2940, lr_0 = 2.2279e-04
Loss = 3.6223e-01, PNorm = 62.2090, GNorm = 1.3419, lr_0 = 2.2263e-04
Loss = 3.5232e-01, PNorm = 62.2093, GNorm = 1.9679, lr_0 = 2.2248e-04
Loss = 4.0262e-01, PNorm = 62.2117, GNorm = 1.6881, lr_0 = 2.2233e-04
Loss = 3.8139e-01, PNorm = 62.2161, GNorm = 1.3443, lr_0 = 2.2218e-04
Loss = 3.6141e-01, PNorm = 62.2153, GNorm = 1.3932, lr_0 = 2.2202e-04
Loss = 4.2824e-01, PNorm = 62.2167, GNorm = 1.1608, lr_0 = 2.2187e-04
Loss = 3.1459e-01, PNorm = 62.2199, GNorm = 1.5374, lr_0 = 2.2172e-04
Loss = 3.3323e-01, PNorm = 62.2216, GNorm = 1.5248, lr_0 = 2.2157e-04
Loss = 3.8862e-01, PNorm = 62.2219, GNorm = 1.9284, lr_0 = 2.2142e-04
Loss = 3.8123e-01, PNorm = 62.2257, GNorm = 1.4912, lr_0 = 2.2126e-04
Loss = 4.1319e-01, PNorm = 62.2295, GNorm = 1.5886, lr_0 = 2.2111e-04
Loss = 3.3892e-01, PNorm = 62.2316, GNorm = 1.5401, lr_0 = 2.2096e-04
Loss = 3.7878e-01, PNorm = 62.2360, GNorm = 1.1318, lr_0 = 2.2081e-04
Loss = 3.8097e-01, PNorm = 62.2393, GNorm = 1.2363, lr_0 = 2.2066e-04
Loss = 3.6426e-01, PNorm = 62.2393, GNorm = 1.4040, lr_0 = 2.2051e-04
Loss = 3.7197e-01, PNorm = 62.2418, GNorm = 1.6978, lr_0 = 2.2036e-04
Loss = 3.7659e-01, PNorm = 62.2448, GNorm = 1.4892, lr_0 = 2.2021e-04
Loss = 3.6901e-01, PNorm = 62.2472, GNorm = 1.4467, lr_0 = 2.2005e-04
Loss = 3.5322e-01, PNorm = 62.2507, GNorm = 1.5444, lr_0 = 2.1990e-04
Loss = 3.3097e-01, PNorm = 62.2543, GNorm = 1.7072, lr_0 = 2.1975e-04
Loss = 3.6778e-01, PNorm = 62.2556, GNorm = 1.2641, lr_0 = 2.1960e-04
Loss = 3.7059e-01, PNorm = 62.2595, GNorm = 1.3895, lr_0 = 2.1945e-04
Loss = 3.5964e-01, PNorm = 62.2599, GNorm = 1.5676, lr_0 = 2.1930e-04
Loss = 3.5190e-01, PNorm = 62.2632, GNorm = 1.3369, lr_0 = 2.1915e-04
Loss = 3.8953e-01, PNorm = 62.2659, GNorm = 1.2706, lr_0 = 2.1900e-04
Loss = 3.7585e-01, PNorm = 62.2636, GNorm = 1.1518, lr_0 = 2.1885e-04
Loss = 3.9955e-01, PNorm = 62.2685, GNorm = 1.7043, lr_0 = 2.1870e-04
Loss = 3.3814e-01, PNorm = 62.2735, GNorm = 1.1072, lr_0 = 2.1855e-04
Loss = 3.4645e-01, PNorm = 62.2750, GNorm = 1.2246, lr_0 = 2.1840e-04
Loss = 3.7869e-01, PNorm = 62.2781, GNorm = 1.6066, lr_0 = 2.1825e-04
Loss = 3.4826e-01, PNorm = 62.2823, GNorm = 1.2611, lr_0 = 2.1810e-04
Loss = 3.7186e-01, PNorm = 62.2852, GNorm = 1.0522, lr_0 = 2.1795e-04
Loss = 3.3480e-01, PNorm = 62.2881, GNorm = 1.5535, lr_0 = 2.1780e-04
Loss = 3.4359e-01, PNorm = 62.2901, GNorm = 1.2420, lr_0 = 2.1765e-04
Loss = 4.0196e-01, PNorm = 62.2940, GNorm = 1.2601, lr_0 = 2.1751e-04
Loss = 4.2738e-01, PNorm = 62.2975, GNorm = 1.5734, lr_0 = 2.1736e-04
Loss = 3.4330e-01, PNorm = 62.3000, GNorm = 1.4332, lr_0 = 2.1721e-04
Loss = 3.8075e-01, PNorm = 62.2998, GNorm = 1.2486, lr_0 = 2.1706e-04
Loss = 4.2694e-01, PNorm = 62.3024, GNorm = 2.1474, lr_0 = 2.1691e-04
Loss = 3.4005e-01, PNorm = 62.3081, GNorm = 1.6914, lr_0 = 2.1676e-04
Loss = 3.3920e-01, PNorm = 62.3121, GNorm = 1.3762, lr_0 = 2.1661e-04
Loss = 3.7377e-01, PNorm = 62.3127, GNorm = 1.2158, lr_0 = 2.1646e-04
Loss = 3.2992e-01, PNorm = 62.3157, GNorm = 1.4401, lr_0 = 2.1632e-04
Loss = 3.5890e-01, PNorm = 62.3187, GNorm = 1.4107, lr_0 = 2.1617e-04
Loss = 3.9016e-01, PNorm = 62.3169, GNorm = 1.5868, lr_0 = 2.1602e-04
Loss = 3.6066e-01, PNorm = 62.3207, GNorm = 1.5423, lr_0 = 2.1587e-04
Loss = 3.8172e-01, PNorm = 62.3208, GNorm = 1.3484, lr_0 = 2.1572e-04
Loss = 3.7086e-01, PNorm = 62.3199, GNorm = 1.4923, lr_0 = 2.1558e-04
Loss = 3.1163e-01, PNorm = 62.3222, GNorm = 1.1017, lr_0 = 2.1543e-04
Loss = 3.6454e-01, PNorm = 62.3235, GNorm = 1.5851, lr_0 = 2.1528e-04
Loss = 3.6046e-01, PNorm = 62.3262, GNorm = 1.2034, lr_0 = 2.1513e-04
Loss = 3.7687e-01, PNorm = 62.3308, GNorm = 1.4208, lr_0 = 2.1499e-04
Loss = 3.9337e-01, PNorm = 62.3350, GNorm = 1.4410, lr_0 = 2.1484e-04
Loss = 3.9220e-01, PNorm = 62.3373, GNorm = 1.4947, lr_0 = 2.1469e-04
Loss = 3.7191e-01, PNorm = 62.3381, GNorm = 1.4017, lr_0 = 2.1454e-04
Loss = 3.5506e-01, PNorm = 62.3403, GNorm = 1.8179, lr_0 = 2.1440e-04
Loss = 3.4640e-01, PNorm = 62.3432, GNorm = 1.6962, lr_0 = 2.1425e-04
Loss = 3.8810e-01, PNorm = 62.3473, GNorm = 1.6760, lr_0 = 2.1410e-04
Loss = 3.2311e-01, PNorm = 62.3509, GNorm = 1.8035, lr_0 = 2.1396e-04
Loss = 3.6490e-01, PNorm = 62.3508, GNorm = 2.0069, lr_0 = 2.1381e-04
Loss = 3.6985e-01, PNorm = 62.3496, GNorm = 2.3936, lr_0 = 2.1366e-04
Loss = 3.5790e-01, PNorm = 62.3540, GNorm = 1.5228, lr_0 = 2.1352e-04
Loss = 3.6254e-01, PNorm = 62.3546, GNorm = 1.0923, lr_0 = 2.1337e-04
Loss = 3.0636e-01, PNorm = 62.3558, GNorm = 1.3870, lr_0 = 2.1323e-04
Loss = 3.7256e-01, PNorm = 62.3588, GNorm = 1.2644, lr_0 = 2.1308e-04
Loss = 3.3195e-01, PNorm = 62.3606, GNorm = 1.3408, lr_0 = 2.1293e-04
Loss = 3.1994e-01, PNorm = 62.3640, GNorm = 1.0425, lr_0 = 2.1279e-04
Loss = 3.5617e-01, PNorm = 62.3662, GNorm = 1.5668, lr_0 = 2.1264e-04
Loss = 3.2160e-01, PNorm = 62.3704, GNorm = 1.7547, lr_0 = 2.1250e-04
Loss = 3.3619e-01, PNorm = 62.3722, GNorm = 1.3340, lr_0 = 2.1235e-04
Loss = 3.7702e-01, PNorm = 62.3747, GNorm = 1.7289, lr_0 = 2.1221e-04
Loss = 3.4729e-01, PNorm = 62.3777, GNorm = 1.2676, lr_0 = 2.1206e-04
Loss = 3.5549e-01, PNorm = 62.3799, GNorm = 1.0803, lr_0 = 2.1191e-04
Loss = 4.9733e-01, PNorm = 62.3848, GNorm = 1.6580, lr_0 = 2.1177e-04
Loss = 3.7032e-01, PNorm = 62.3902, GNorm = 1.4693, lr_0 = 2.1162e-04
Loss = 3.5374e-01, PNorm = 62.3933, GNorm = 1.5867, lr_0 = 2.1148e-04
Loss = 3.6391e-01, PNorm = 62.3934, GNorm = 1.7826, lr_0 = 2.1133e-04
Loss = 3.6937e-01, PNorm = 62.3976, GNorm = 0.9141, lr_0 = 2.1119e-04
Loss = 4.0237e-01, PNorm = 62.3991, GNorm = 1.5085, lr_0 = 2.1104e-04
Loss = 3.4808e-01, PNorm = 62.4007, GNorm = 1.2136, lr_0 = 2.1090e-04
Loss = 3.8549e-01, PNorm = 62.4023, GNorm = 1.3659, lr_0 = 2.1076e-04
Loss = 3.4243e-01, PNorm = 62.4003, GNorm = 1.1559, lr_0 = 2.1061e-04
Loss = 3.9024e-01, PNorm = 62.4006, GNorm = 1.2810, lr_0 = 2.1047e-04
Loss = 3.9054e-01, PNorm = 62.3993, GNorm = 1.7995, lr_0 = 2.1032e-04
Loss = 3.6124e-01, PNorm = 62.4016, GNorm = 1.9068, lr_0 = 2.1018e-04
Loss = 3.6821e-01, PNorm = 62.4060, GNorm = 1.5117, lr_0 = 2.1003e-04
Loss = 3.4366e-01, PNorm = 62.4096, GNorm = 1.6253, lr_0 = 2.0989e-04
Loss = 3.4399e-01, PNorm = 62.4127, GNorm = 1.4608, lr_0 = 2.0975e-04
Loss = 3.3360e-01, PNorm = 62.4138, GNorm = 1.2316, lr_0 = 2.0960e-04
Validation mae = 0.111393
Epoch 21
Loss = 3.3803e-01, PNorm = 62.4141, GNorm = 1.6516, lr_0 = 2.0946e-04
Loss = 3.7037e-01, PNorm = 62.4158, GNorm = 1.1770, lr_0 = 2.0932e-04
Loss = 2.8925e-01, PNorm = 62.4190, GNorm = 0.9281, lr_0 = 2.0917e-04
Loss = 3.1751e-01, PNorm = 62.4188, GNorm = 1.2171, lr_0 = 2.0903e-04
Loss = 4.2650e-01, PNorm = 62.4183, GNorm = 2.1650, lr_0 = 2.0889e-04
Loss = 3.8050e-01, PNorm = 62.4233, GNorm = 1.6202, lr_0 = 2.0874e-04
Loss = 3.3136e-01, PNorm = 62.4245, GNorm = 1.6218, lr_0 = 2.0860e-04
Loss = 3.7611e-01, PNorm = 62.4270, GNorm = 1.2413, lr_0 = 2.0846e-04
Loss = 3.7889e-01, PNorm = 62.4284, GNorm = 1.2644, lr_0 = 2.0831e-04
Loss = 3.5507e-01, PNorm = 62.4287, GNorm = 1.4493, lr_0 = 2.0817e-04
Loss = 3.7602e-01, PNorm = 62.4309, GNorm = 1.4016, lr_0 = 2.0803e-04
Loss = 3.3162e-01, PNorm = 62.4330, GNorm = 1.2732, lr_0 = 2.0789e-04
Loss = 3.5556e-01, PNorm = 62.4348, GNorm = 1.9296, lr_0 = 2.0774e-04
Loss = 3.4604e-01, PNorm = 62.4405, GNorm = 1.2008, lr_0 = 2.0760e-04
Loss = 3.2163e-01, PNorm = 62.4441, GNorm = 1.1438, lr_0 = 2.0746e-04
Loss = 3.8385e-01, PNorm = 62.4477, GNorm = 1.9803, lr_0 = 2.0732e-04
Loss = 4.2268e-01, PNorm = 62.4483, GNorm = 2.2466, lr_0 = 2.0718e-04
Loss = 3.7411e-01, PNorm = 62.4536, GNorm = 1.4055, lr_0 = 2.0703e-04
Loss = 3.9959e-01, PNorm = 62.4566, GNorm = 1.1842, lr_0 = 2.0689e-04
Loss = 3.3757e-01, PNorm = 62.4595, GNorm = 1.5940, lr_0 = 2.0675e-04
Loss = 3.4381e-01, PNorm = 62.4594, GNorm = 1.5455, lr_0 = 2.0661e-04
Loss = 3.4704e-01, PNorm = 62.4602, GNorm = 1.0666, lr_0 = 2.0647e-04
Loss = 3.7077e-01, PNorm = 62.4622, GNorm = 1.1935, lr_0 = 2.0633e-04
Loss = 3.4767e-01, PNorm = 62.4655, GNorm = 1.8331, lr_0 = 2.0618e-04
Loss = 3.0534e-01, PNorm = 62.4667, GNorm = 1.4224, lr_0 = 2.0604e-04
Loss = 3.2174e-01, PNorm = 62.4706, GNorm = 1.9751, lr_0 = 2.0590e-04
Loss = 4.0583e-01, PNorm = 62.4730, GNorm = 1.4336, lr_0 = 2.0576e-04
Loss = 3.8731e-01, PNorm = 62.4720, GNorm = 1.6008, lr_0 = 2.0562e-04
Loss = 3.9573e-01, PNorm = 62.4732, GNorm = 1.4065, lr_0 = 2.0548e-04
Loss = 3.2561e-01, PNorm = 62.4755, GNorm = 1.4131, lr_0 = 2.0534e-04
Loss = 3.6090e-01, PNorm = 62.4783, GNorm = 1.4822, lr_0 = 2.0520e-04
Loss = 3.7505e-01, PNorm = 62.4799, GNorm = 1.2792, lr_0 = 2.0506e-04
Loss = 3.5126e-01, PNorm = 62.4819, GNorm = 1.7071, lr_0 = 2.0492e-04
Loss = 3.5718e-01, PNorm = 62.4837, GNorm = 1.8019, lr_0 = 2.0478e-04
Loss = 3.7095e-01, PNorm = 62.4816, GNorm = 1.3757, lr_0 = 2.0464e-04
Loss = 3.4661e-01, PNorm = 62.4824, GNorm = 1.4305, lr_0 = 2.0450e-04
Loss = 3.3566e-01, PNorm = 62.4859, GNorm = 1.1578, lr_0 = 2.0436e-04
Loss = 3.7139e-01, PNorm = 62.4876, GNorm = 1.2399, lr_0 = 2.0422e-04
Loss = 4.3921e-01, PNorm = 62.4912, GNorm = 1.8145, lr_0 = 2.0408e-04
Loss = 4.0235e-01, PNorm = 62.4935, GNorm = 2.2926, lr_0 = 2.0394e-04
Loss = 4.1937e-01, PNorm = 62.4954, GNorm = 1.8709, lr_0 = 2.0380e-04
Loss = 3.2718e-01, PNorm = 62.4972, GNorm = 1.3415, lr_0 = 2.0366e-04
Loss = 3.6821e-01, PNorm = 62.4997, GNorm = 1.0684, lr_0 = 2.0352e-04
Loss = 3.9886e-01, PNorm = 62.5028, GNorm = 1.5350, lr_0 = 2.0338e-04
Loss = 3.4298e-01, PNorm = 62.5014, GNorm = 1.2230, lr_0 = 2.0324e-04
Loss = 4.1635e-01, PNorm = 62.5017, GNorm = 1.4647, lr_0 = 2.0310e-04
Loss = 3.8370e-01, PNorm = 62.5047, GNorm = 1.4045, lr_0 = 2.0296e-04
Loss = 3.6371e-01, PNorm = 62.5081, GNorm = 2.1505, lr_0 = 2.0282e-04
Loss = 3.3632e-01, PNorm = 62.5099, GNorm = 1.6871, lr_0 = 2.0268e-04
Loss = 3.1017e-01, PNorm = 62.5103, GNorm = 1.9457, lr_0 = 2.0254e-04
Loss = 3.4338e-01, PNorm = 62.5127, GNorm = 1.5168, lr_0 = 2.0240e-04
Loss = 3.5417e-01, PNorm = 62.5160, GNorm = 1.2569, lr_0 = 2.0227e-04
Loss = 3.3405e-01, PNorm = 62.5184, GNorm = 1.6484, lr_0 = 2.0213e-04
Loss = 3.4970e-01, PNorm = 62.5207, GNorm = 1.8391, lr_0 = 2.0199e-04
Loss = 3.7748e-01, PNorm = 62.5201, GNorm = 1.2550, lr_0 = 2.0185e-04
Loss = 3.9778e-01, PNorm = 62.5214, GNorm = 2.0386, lr_0 = 2.0171e-04
Loss = 3.7453e-01, PNorm = 62.5197, GNorm = 1.2245, lr_0 = 2.0157e-04
Loss = 3.9907e-01, PNorm = 62.5217, GNorm = 2.0624, lr_0 = 2.0144e-04
Loss = 3.1230e-01, PNorm = 62.5267, GNorm = 1.2297, lr_0 = 2.0130e-04
Loss = 3.5808e-01, PNorm = 62.5286, GNorm = 1.6967, lr_0 = 2.0116e-04
Loss = 3.7586e-01, PNorm = 62.5306, GNorm = 1.4807, lr_0 = 2.0102e-04
Loss = 3.6959e-01, PNorm = 62.5358, GNorm = 2.1081, lr_0 = 2.0088e-04
Loss = 4.0671e-01, PNorm = 62.5346, GNorm = 1.5497, lr_0 = 2.0075e-04
Loss = 3.4635e-01, PNorm = 62.5386, GNorm = 1.2353, lr_0 = 2.0061e-04
Loss = 3.3509e-01, PNorm = 62.5442, GNorm = 1.5252, lr_0 = 2.0047e-04
Loss = 3.5060e-01, PNorm = 62.5464, GNorm = 1.7253, lr_0 = 2.0033e-04
Loss = 3.9778e-01, PNorm = 62.5498, GNorm = 1.5064, lr_0 = 2.0020e-04
Loss = 3.6187e-01, PNorm = 62.5533, GNorm = 2.0009, lr_0 = 2.0006e-04
Loss = 3.0310e-01, PNorm = 62.5535, GNorm = 0.8760, lr_0 = 1.9992e-04
Loss = 3.5664e-01, PNorm = 62.5576, GNorm = 1.4372, lr_0 = 1.9979e-04
Loss = 3.4741e-01, PNorm = 62.5577, GNorm = 1.3877, lr_0 = 1.9965e-04
Loss = 3.7675e-01, PNorm = 62.5585, GNorm = 1.2344, lr_0 = 1.9951e-04
Loss = 4.0930e-01, PNorm = 62.5611, GNorm = 2.5936, lr_0 = 1.9938e-04
Loss = 3.6083e-01, PNorm = 62.5642, GNorm = 1.4208, lr_0 = 1.9924e-04
Loss = 3.4046e-01, PNorm = 62.5633, GNorm = 1.2223, lr_0 = 1.9910e-04
Loss = 3.7814e-01, PNorm = 62.5645, GNorm = 2.1826, lr_0 = 1.9897e-04
Loss = 3.3379e-01, PNorm = 62.5682, GNorm = 1.3171, lr_0 = 1.9883e-04
Loss = 3.6663e-01, PNorm = 62.5721, GNorm = 1.4021, lr_0 = 1.9869e-04
Loss = 3.4586e-01, PNorm = 62.5750, GNorm = 1.0340, lr_0 = 1.9856e-04
Loss = 3.6384e-01, PNorm = 62.5784, GNorm = 1.5983, lr_0 = 1.9842e-04
Loss = 4.6599e-01, PNorm = 62.5795, GNorm = 1.5014, lr_0 = 1.9829e-04
Loss = 3.4330e-01, PNorm = 62.5815, GNorm = 1.3214, lr_0 = 1.9815e-04
Loss = 3.5538e-01, PNorm = 62.5826, GNorm = 1.3988, lr_0 = 1.9801e-04
Loss = 3.2978e-01, PNorm = 62.5844, GNorm = 1.3124, lr_0 = 1.9788e-04
Loss = 3.2635e-01, PNorm = 62.5887, GNorm = 0.8684, lr_0 = 1.9774e-04
Loss = 3.8963e-01, PNorm = 62.5901, GNorm = 1.7927, lr_0 = 1.9761e-04
Loss = 3.3519e-01, PNorm = 62.5909, GNorm = 1.6339, lr_0 = 1.9747e-04
Loss = 3.3242e-01, PNorm = 62.5916, GNorm = 1.0380, lr_0 = 1.9734e-04
Loss = 3.4233e-01, PNorm = 62.5928, GNorm = 1.3026, lr_0 = 1.9720e-04
Loss = 3.8600e-01, PNorm = 62.5940, GNorm = 1.3572, lr_0 = 1.9707e-04
Loss = 3.4048e-01, PNorm = 62.5955, GNorm = 1.4585, lr_0 = 1.9693e-04
Loss = 3.0588e-01, PNorm = 62.5975, GNorm = 1.0623, lr_0 = 1.9680e-04
Loss = 3.8303e-01, PNorm = 62.6001, GNorm = 1.4864, lr_0 = 1.9666e-04
Loss = 3.4581e-01, PNorm = 62.6042, GNorm = 1.2783, lr_0 = 1.9653e-04
Loss = 3.6335e-01, PNorm = 62.6065, GNorm = 1.3796, lr_0 = 1.9639e-04
Loss = 3.4589e-01, PNorm = 62.6086, GNorm = 1.3104, lr_0 = 1.9626e-04
Loss = 3.8138e-01, PNorm = 62.6087, GNorm = 1.6689, lr_0 = 1.9612e-04
Loss = 3.5112e-01, PNorm = 62.6094, GNorm = 1.6244, lr_0 = 1.9599e-04
Loss = 3.7288e-01, PNorm = 62.6123, GNorm = 1.4296, lr_0 = 1.9585e-04
Loss = 3.8074e-01, PNorm = 62.6109, GNorm = 1.8338, lr_0 = 1.9572e-04
Loss = 3.7387e-01, PNorm = 62.6116, GNorm = 1.5531, lr_0 = 1.9559e-04
Loss = 3.6326e-01, PNorm = 62.6138, GNorm = 1.6013, lr_0 = 1.9545e-04
Loss = 3.9776e-01, PNorm = 62.6149, GNorm = 1.4396, lr_0 = 1.9532e-04
Loss = 4.0318e-01, PNorm = 62.6169, GNorm = 1.5229, lr_0 = 1.9518e-04
Loss = 3.3846e-01, PNorm = 62.6184, GNorm = 1.3148, lr_0 = 1.9505e-04
Loss = 3.5057e-01, PNorm = 62.6190, GNorm = 1.7773, lr_0 = 1.9492e-04
Loss = 3.4053e-01, PNorm = 62.6188, GNorm = 1.4714, lr_0 = 1.9478e-04
Loss = 3.6192e-01, PNorm = 62.6209, GNorm = 1.7776, lr_0 = 1.9465e-04
Loss = 3.5079e-01, PNorm = 62.6251, GNorm = 1.8226, lr_0 = 1.9452e-04
Loss = 3.3869e-01, PNorm = 62.6286, GNorm = 1.5561, lr_0 = 1.9438e-04
Loss = 3.9506e-01, PNorm = 62.6312, GNorm = 1.4739, lr_0 = 1.9425e-04
Loss = 3.7592e-01, PNorm = 62.6319, GNorm = 1.5997, lr_0 = 1.9412e-04
Loss = 3.1703e-01, PNorm = 62.6348, GNorm = 1.4047, lr_0 = 1.9398e-04
Loss = 3.8225e-01, PNorm = 62.6394, GNorm = 1.4436, lr_0 = 1.9385e-04
Loss = 3.5432e-01, PNorm = 62.6411, GNorm = 1.3103, lr_0 = 1.9372e-04
Loss = 3.4938e-01, PNorm = 62.6420, GNorm = 1.1573, lr_0 = 1.9359e-04
Loss = 3.3114e-01, PNorm = 62.6422, GNorm = 1.2384, lr_0 = 1.9345e-04
Loss = 4.4730e-01, PNorm = 62.6465, GNorm = 1.5319, lr_0 = 1.9332e-04
Loss = 3.5174e-01, PNorm = 62.6506, GNorm = 1.5903, lr_0 = 1.9319e-04
Loss = 3.2889e-01, PNorm = 62.6534, GNorm = 1.4066, lr_0 = 1.9306e-04
Validation mae = 0.111592
Epoch 22
Loss = 3.4305e-01, PNorm = 62.6544, GNorm = 1.6679, lr_0 = 1.9292e-04
Loss = 3.6840e-01, PNorm = 62.6568, GNorm = 1.2105, lr_0 = 1.9279e-04
Loss = 3.4077e-01, PNorm = 62.6578, GNorm = 1.5300, lr_0 = 1.9266e-04
Loss = 3.5398e-01, PNorm = 62.6600, GNorm = 1.4829, lr_0 = 1.9253e-04
Loss = 3.3359e-01, PNorm = 62.6618, GNorm = 1.0438, lr_0 = 1.9240e-04
Loss = 4.0390e-01, PNorm = 62.6640, GNorm = 1.6305, lr_0 = 1.9226e-04
Loss = 3.6903e-01, PNorm = 62.6663, GNorm = 1.7238, lr_0 = 1.9213e-04
Loss = 3.4655e-01, PNorm = 62.6680, GNorm = 1.6850, lr_0 = 1.9200e-04
Loss = 3.3339e-01, PNorm = 62.6705, GNorm = 1.4508, lr_0 = 1.9187e-04
Loss = 3.6199e-01, PNorm = 62.6730, GNorm = 1.8175, lr_0 = 1.9174e-04
Loss = 3.8013e-01, PNorm = 62.6743, GNorm = 1.6277, lr_0 = 1.9161e-04
Loss = 3.7480e-01, PNorm = 62.6768, GNorm = 1.5287, lr_0 = 1.9148e-04
Loss = 3.4348e-01, PNorm = 62.6778, GNorm = 1.4859, lr_0 = 1.9134e-04
Loss = 3.6481e-01, PNorm = 62.6799, GNorm = 1.5114, lr_0 = 1.9121e-04
Loss = 4.0183e-01, PNorm = 62.6839, GNorm = 1.6862, lr_0 = 1.9108e-04
Loss = 3.5483e-01, PNorm = 62.6849, GNorm = 1.8026, lr_0 = 1.9095e-04
Loss = 3.2644e-01, PNorm = 62.6862, GNorm = 1.4792, lr_0 = 1.9082e-04
Loss = 3.2821e-01, PNorm = 62.6905, GNorm = 1.4588, lr_0 = 1.9069e-04
Loss = 3.9164e-01, PNorm = 62.6937, GNorm = 1.3494, lr_0 = 1.9056e-04
Loss = 3.6349e-01, PNorm = 62.6971, GNorm = 2.0363, lr_0 = 1.9043e-04
Loss = 3.0876e-01, PNorm = 62.6969, GNorm = 1.1186, lr_0 = 1.9030e-04
Loss = 3.1750e-01, PNorm = 62.6976, GNorm = 2.1894, lr_0 = 1.9017e-04
Loss = 3.5839e-01, PNorm = 62.6983, GNorm = 1.2776, lr_0 = 1.9004e-04
Loss = 3.4918e-01, PNorm = 62.7016, GNorm = 1.3432, lr_0 = 1.8991e-04
Loss = 3.2260e-01, PNorm = 62.7019, GNorm = 1.6453, lr_0 = 1.8978e-04
Loss = 3.5193e-01, PNorm = 62.7026, GNorm = 2.0452, lr_0 = 1.8965e-04
Loss = 3.5154e-01, PNorm = 62.7068, GNorm = 1.3181, lr_0 = 1.8952e-04
Loss = 3.1628e-01, PNorm = 62.7120, GNorm = 1.2880, lr_0 = 1.8939e-04
Loss = 3.4495e-01, PNorm = 62.7149, GNorm = 1.9809, lr_0 = 1.8926e-04
Loss = 3.5548e-01, PNorm = 62.7163, GNorm = 1.5115, lr_0 = 1.8913e-04
Loss = 3.6521e-01, PNorm = 62.7207, GNorm = 1.7806, lr_0 = 1.8900e-04
Loss = 4.1345e-01, PNorm = 62.7243, GNorm = 2.1908, lr_0 = 1.8887e-04
Loss = 3.3803e-01, PNorm = 62.7281, GNorm = 1.1799, lr_0 = 1.8874e-04
Loss = 3.3190e-01, PNorm = 62.7308, GNorm = 1.4775, lr_0 = 1.8861e-04
Loss = 4.2460e-01, PNorm = 62.7320, GNorm = 1.7017, lr_0 = 1.8848e-04
Loss = 3.4465e-01, PNorm = 62.7362, GNorm = 1.3893, lr_0 = 1.8835e-04
Loss = 4.1773e-01, PNorm = 62.7367, GNorm = 1.8489, lr_0 = 1.8822e-04
Loss = 3.4402e-01, PNorm = 62.7346, GNorm = 1.4356, lr_0 = 1.8809e-04
Loss = 3.5891e-01, PNorm = 62.7368, GNorm = 1.7732, lr_0 = 1.8797e-04
Loss = 3.4297e-01, PNorm = 62.7385, GNorm = 1.7824, lr_0 = 1.8784e-04
Loss = 3.6603e-01, PNorm = 62.7392, GNorm = 2.6079, lr_0 = 1.8771e-04
Loss = 3.8534e-01, PNorm = 62.7407, GNorm = 1.5976, lr_0 = 1.8758e-04
Loss = 3.0391e-01, PNorm = 62.7417, GNorm = 1.5587, lr_0 = 1.8745e-04
Loss = 3.5671e-01, PNorm = 62.7435, GNorm = 1.6200, lr_0 = 1.8732e-04
Loss = 3.9419e-01, PNorm = 62.7456, GNorm = 1.2100, lr_0 = 1.8719e-04
Loss = 3.3929e-01, PNorm = 62.7493, GNorm = 1.1558, lr_0 = 1.8707e-04
Loss = 3.4011e-01, PNorm = 62.7506, GNorm = 1.2370, lr_0 = 1.8694e-04
Loss = 3.6357e-01, PNorm = 62.7529, GNorm = 1.4661, lr_0 = 1.8681e-04
Loss = 3.4950e-01, PNorm = 62.7538, GNorm = 1.2754, lr_0 = 1.8668e-04
Loss = 2.8195e-01, PNorm = 62.7573, GNorm = 1.5817, lr_0 = 1.8655e-04
Loss = 3.6828e-01, PNorm = 62.7574, GNorm = 1.5607, lr_0 = 1.8643e-04
Loss = 3.5434e-01, PNorm = 62.7587, GNorm = 1.1276, lr_0 = 1.8630e-04
Loss = 3.5021e-01, PNorm = 62.7613, GNorm = 1.3538, lr_0 = 1.8617e-04
Loss = 3.4259e-01, PNorm = 62.7637, GNorm = 1.1138, lr_0 = 1.8604e-04
Loss = 3.4935e-01, PNorm = 62.7657, GNorm = 1.5678, lr_0 = 1.8592e-04
Loss = 3.8504e-01, PNorm = 62.7644, GNorm = 1.9740, lr_0 = 1.8579e-04
Loss = 4.0467e-01, PNorm = 62.7681, GNorm = 1.8705, lr_0 = 1.8566e-04
Loss = 3.6699e-01, PNorm = 62.7704, GNorm = 1.3916, lr_0 = 1.8553e-04
Loss = 3.6408e-01, PNorm = 62.7722, GNorm = 2.5209, lr_0 = 1.8541e-04
Loss = 3.5028e-01, PNorm = 62.7750, GNorm = 1.7378, lr_0 = 1.8528e-04
Loss = 3.3116e-01, PNorm = 62.7765, GNorm = 1.3310, lr_0 = 1.8515e-04
Loss = 3.6437e-01, PNorm = 62.7781, GNorm = 1.4548, lr_0 = 1.8503e-04
Loss = 3.3379e-01, PNorm = 62.7812, GNorm = 1.0227, lr_0 = 1.8490e-04
Loss = 3.8467e-01, PNorm = 62.7811, GNorm = 1.2613, lr_0 = 1.8477e-04
Loss = 3.7059e-01, PNorm = 62.7804, GNorm = 1.5283, lr_0 = 1.8465e-04
Loss = 3.5319e-01, PNorm = 62.7838, GNorm = 1.2643, lr_0 = 1.8452e-04
Loss = 3.5549e-01, PNorm = 62.7860, GNorm = 1.4297, lr_0 = 1.8439e-04
Loss = 3.2948e-01, PNorm = 62.7871, GNorm = 1.3726, lr_0 = 1.8427e-04
Loss = 3.2880e-01, PNorm = 62.7907, GNorm = 1.2991, lr_0 = 1.8414e-04
Loss = 3.7288e-01, PNorm = 62.7890, GNorm = 1.3321, lr_0 = 1.8401e-04
Loss = 3.3642e-01, PNorm = 62.7903, GNorm = 1.0150, lr_0 = 1.8389e-04
Loss = 4.3155e-01, PNorm = 62.7944, GNorm = 1.9884, lr_0 = 1.8376e-04
Loss = 3.6900e-01, PNorm = 62.7984, GNorm = 1.5827, lr_0 = 1.8364e-04
Loss = 3.5504e-01, PNorm = 62.8003, GNorm = 1.3870, lr_0 = 1.8351e-04
Loss = 3.9202e-01, PNorm = 62.8016, GNorm = 1.5523, lr_0 = 1.8338e-04
Loss = 3.9070e-01, PNorm = 62.8031, GNorm = 1.3578, lr_0 = 1.8326e-04
Loss = 3.2485e-01, PNorm = 62.8039, GNorm = 1.3687, lr_0 = 1.8313e-04
Loss = 3.2688e-01, PNorm = 62.8054, GNorm = 2.2528, lr_0 = 1.8301e-04
Loss = 3.6610e-01, PNorm = 62.8072, GNorm = 0.9937, lr_0 = 1.8288e-04
Loss = 3.8359e-01, PNorm = 62.8073, GNorm = 2.0286, lr_0 = 1.8276e-04
Loss = 3.4785e-01, PNorm = 62.8104, GNorm = 2.0516, lr_0 = 1.8263e-04
Loss = 3.3158e-01, PNorm = 62.8116, GNorm = 1.4888, lr_0 = 1.8251e-04
Loss = 3.6603e-01, PNorm = 62.8128, GNorm = 1.1445, lr_0 = 1.8238e-04
Loss = 3.4962e-01, PNorm = 62.8151, GNorm = 1.7724, lr_0 = 1.8226e-04
Loss = 3.4006e-01, PNorm = 62.8140, GNorm = 1.5085, lr_0 = 1.8213e-04
Loss = 3.8829e-01, PNorm = 62.8153, GNorm = 1.3312, lr_0 = 1.8201e-04
Loss = 3.4677e-01, PNorm = 62.8175, GNorm = 1.4846, lr_0 = 1.8188e-04
Loss = 4.2057e-01, PNorm = 62.8205, GNorm = 1.9186, lr_0 = 1.8176e-04
Loss = 4.0891e-01, PNorm = 62.8223, GNorm = 1.4470, lr_0 = 1.8163e-04
Loss = 3.4970e-01, PNorm = 62.8241, GNorm = 2.5199, lr_0 = 1.8151e-04
Loss = 3.4649e-01, PNorm = 62.8264, GNorm = 1.3676, lr_0 = 1.8138e-04
Loss = 3.5833e-01, PNorm = 62.8292, GNorm = 1.6301, lr_0 = 1.8126e-04
Loss = 3.4486e-01, PNorm = 62.8312, GNorm = 1.1225, lr_0 = 1.8114e-04
Loss = 3.4757e-01, PNorm = 62.8312, GNorm = 1.2049, lr_0 = 1.8101e-04
Loss = 3.1485e-01, PNorm = 62.8344, GNorm = 1.2391, lr_0 = 1.8089e-04
Loss = 3.5478e-01, PNorm = 62.8357, GNorm = 1.1458, lr_0 = 1.8076e-04
Loss = 3.5525e-01, PNorm = 62.8355, GNorm = 1.2670, lr_0 = 1.8064e-04
Loss = 3.2644e-01, PNorm = 62.8385, GNorm = 1.4492, lr_0 = 1.8052e-04
Loss = 3.2033e-01, PNorm = 62.8419, GNorm = 1.6030, lr_0 = 1.8039e-04
Loss = 3.4431e-01, PNorm = 62.8422, GNorm = 1.8587, lr_0 = 1.8027e-04
Loss = 3.1961e-01, PNorm = 62.8427, GNorm = 2.5473, lr_0 = 1.8015e-04
Loss = 4.4159e-01, PNorm = 62.8460, GNorm = 1.6728, lr_0 = 1.8002e-04
Loss = 3.5434e-01, PNorm = 62.8468, GNorm = 1.0841, lr_0 = 1.7990e-04
Loss = 3.9977e-01, PNorm = 62.8475, GNorm = 1.1349, lr_0 = 1.7978e-04
Loss = 3.3688e-01, PNorm = 62.8515, GNorm = 1.1971, lr_0 = 1.7965e-04
Loss = 3.5095e-01, PNorm = 62.8537, GNorm = 1.1046, lr_0 = 1.7953e-04
Loss = 4.0433e-01, PNorm = 62.8557, GNorm = 1.3027, lr_0 = 1.7941e-04
Loss = 3.4255e-01, PNorm = 62.8589, GNorm = 1.2700, lr_0 = 1.7928e-04
Loss = 3.8797e-01, PNorm = 62.8617, GNorm = 1.4044, lr_0 = 1.7916e-04
Loss = 3.8221e-01, PNorm = 62.8679, GNorm = 1.4997, lr_0 = 1.7904e-04
Loss = 3.2970e-01, PNorm = 62.8711, GNorm = 1.4464, lr_0 = 1.7892e-04
Loss = 3.9924e-01, PNorm = 62.8713, GNorm = 1.9453, lr_0 = 1.7879e-04
Loss = 4.4746e-01, PNorm = 62.8737, GNorm = 1.3153, lr_0 = 1.7867e-04
Loss = 3.2329e-01, PNorm = 62.8749, GNorm = 1.2725, lr_0 = 1.7855e-04
Loss = 3.5240e-01, PNorm = 62.8776, GNorm = 1.0701, lr_0 = 1.7843e-04
Loss = 3.8782e-01, PNorm = 62.8812, GNorm = 1.1509, lr_0 = 1.7830e-04
Loss = 3.7237e-01, PNorm = 62.8833, GNorm = 2.7261, lr_0 = 1.7818e-04
Loss = 3.1645e-01, PNorm = 62.8829, GNorm = 1.8423, lr_0 = 1.7806e-04
Loss = 3.5247e-01, PNorm = 62.8858, GNorm = 1.1106, lr_0 = 1.7794e-04
Loss = 3.8878e-01, PNorm = 62.8883, GNorm = 1.6445, lr_0 = 1.7782e-04
Validation mae = 0.111245
Epoch 23
Loss = 3.3835e-01, PNorm = 62.8906, GNorm = 2.4572, lr_0 = 1.7769e-04
Loss = 3.2427e-01, PNorm = 62.8900, GNorm = 1.8874, lr_0 = 1.7757e-04
Loss = 3.3470e-01, PNorm = 62.8919, GNorm = 1.3983, lr_0 = 1.7745e-04
Loss = 3.5020e-01, PNorm = 62.8927, GNorm = 1.8127, lr_0 = 1.7733e-04
Loss = 3.1010e-01, PNorm = 62.8943, GNorm = 1.1743, lr_0 = 1.7721e-04
Loss = 3.6504e-01, PNorm = 62.8959, GNorm = 1.3194, lr_0 = 1.7709e-04
Loss = 3.3330e-01, PNorm = 62.8963, GNorm = 1.8136, lr_0 = 1.7696e-04
Loss = 3.6696e-01, PNorm = 62.8981, GNorm = 1.6149, lr_0 = 1.7684e-04
Loss = 3.4941e-01, PNorm = 62.9009, GNorm = 1.6171, lr_0 = 1.7672e-04
Loss = 3.5719e-01, PNorm = 62.9009, GNorm = 1.8101, lr_0 = 1.7660e-04
Loss = 3.3786e-01, PNorm = 62.9032, GNorm = 2.1284, lr_0 = 1.7648e-04
Loss = 3.2216e-01, PNorm = 62.9045, GNorm = 1.4181, lr_0 = 1.7636e-04
Loss = 3.2190e-01, PNorm = 62.9047, GNorm = 1.4635, lr_0 = 1.7624e-04
Loss = 3.0293e-01, PNorm = 62.9064, GNorm = 1.2142, lr_0 = 1.7612e-04
Loss = 3.7739e-01, PNorm = 62.9099, GNorm = 1.5487, lr_0 = 1.7600e-04
Loss = 3.6285e-01, PNorm = 62.9089, GNorm = 1.7100, lr_0 = 1.7588e-04
Loss = 3.5745e-01, PNorm = 62.9102, GNorm = 1.4587, lr_0 = 1.7576e-04
Loss = 3.7884e-01, PNorm = 62.9106, GNorm = 1.5352, lr_0 = 1.7564e-04
Loss = 3.4908e-01, PNorm = 62.9111, GNorm = 1.1106, lr_0 = 1.7552e-04
Loss = 3.9103e-01, PNorm = 62.9144, GNorm = 1.2495, lr_0 = 1.7540e-04
Loss = 3.2001e-01, PNorm = 62.9188, GNorm = 1.3391, lr_0 = 1.7528e-04
Loss = 3.1652e-01, PNorm = 62.9216, GNorm = 1.4246, lr_0 = 1.7516e-04
Loss = 3.7280e-01, PNorm = 62.9228, GNorm = 1.0648, lr_0 = 1.7504e-04
Loss = 3.5132e-01, PNorm = 62.9243, GNorm = 1.4244, lr_0 = 1.7492e-04
Loss = 4.0132e-01, PNorm = 62.9262, GNorm = 2.0808, lr_0 = 1.7480e-04
Loss = 4.0073e-01, PNorm = 62.9294, GNorm = 2.2145, lr_0 = 1.7468e-04
Loss = 3.4292e-01, PNorm = 62.9304, GNorm = 1.1982, lr_0 = 1.7456e-04
Loss = 3.6208e-01, PNorm = 62.9298, GNorm = 1.5444, lr_0 = 1.7444e-04
Loss = 3.2702e-01, PNorm = 62.9301, GNorm = 1.3691, lr_0 = 1.7432e-04
Loss = 3.1879e-01, PNorm = 62.9329, GNorm = 1.7857, lr_0 = 1.7420e-04
Loss = 4.1380e-01, PNorm = 62.9331, GNorm = 1.2355, lr_0 = 1.7408e-04
Loss = 3.5238e-01, PNorm = 62.9331, GNorm = 0.9179, lr_0 = 1.7396e-04
Loss = 3.5898e-01, PNorm = 62.9340, GNorm = 1.5560, lr_0 = 1.7384e-04
Loss = 3.8001e-01, PNorm = 62.9356, GNorm = 1.4247, lr_0 = 1.7372e-04
Loss = 3.5415e-01, PNorm = 62.9394, GNorm = 2.1013, lr_0 = 1.7360e-04
Loss = 3.7776e-01, PNorm = 62.9423, GNorm = 1.5724, lr_0 = 1.7348e-04
Loss = 3.2096e-01, PNorm = 62.9419, GNorm = 1.1937, lr_0 = 1.7336e-04
Loss = 3.2889e-01, PNorm = 62.9440, GNorm = 1.0890, lr_0 = 1.7325e-04
Loss = 3.3868e-01, PNorm = 62.9463, GNorm = 1.9336, lr_0 = 1.7313e-04
Loss = 3.3705e-01, PNorm = 62.9486, GNorm = 2.1096, lr_0 = 1.7301e-04
Loss = 3.1117e-01, PNorm = 62.9502, GNorm = 1.5093, lr_0 = 1.7289e-04
Loss = 3.1874e-01, PNorm = 62.9532, GNorm = 1.0775, lr_0 = 1.7277e-04
Loss = 3.4412e-01, PNorm = 62.9540, GNorm = 1.2224, lr_0 = 1.7265e-04
Loss = 3.2355e-01, PNorm = 62.9562, GNorm = 1.4915, lr_0 = 1.7253e-04
Loss = 3.9526e-01, PNorm = 62.9575, GNorm = 1.6656, lr_0 = 1.7242e-04
Loss = 3.2449e-01, PNorm = 62.9607, GNorm = 1.3871, lr_0 = 1.7230e-04
Loss = 3.2476e-01, PNorm = 62.9640, GNorm = 1.8445, lr_0 = 1.7218e-04
Loss = 3.2248e-01, PNorm = 62.9650, GNorm = 1.5806, lr_0 = 1.7206e-04
Loss = 3.5613e-01, PNorm = 62.9681, GNorm = 1.0935, lr_0 = 1.7194e-04
Loss = 3.7925e-01, PNorm = 62.9712, GNorm = 1.6335, lr_0 = 1.7183e-04
Loss = 3.8177e-01, PNorm = 62.9716, GNorm = 1.8912, lr_0 = 1.7171e-04
Loss = 3.4069e-01, PNorm = 62.9714, GNorm = 1.1887, lr_0 = 1.7159e-04
Loss = 3.5524e-01, PNorm = 62.9736, GNorm = 1.2887, lr_0 = 1.7147e-04
Loss = 3.9636e-01, PNorm = 62.9762, GNorm = 2.7148, lr_0 = 1.7136e-04
Loss = 3.9385e-01, PNorm = 62.9772, GNorm = 1.6543, lr_0 = 1.7124e-04
Loss = 3.6360e-01, PNorm = 62.9805, GNorm = 1.6981, lr_0 = 1.7112e-04
Loss = 3.6034e-01, PNorm = 62.9815, GNorm = 1.3658, lr_0 = 1.7100e-04
Loss = 3.8630e-01, PNorm = 62.9830, GNorm = 1.6478, lr_0 = 1.7089e-04
Loss = 3.5588e-01, PNorm = 62.9866, GNorm = 1.2661, lr_0 = 1.7077e-04
Loss = 3.4798e-01, PNorm = 62.9887, GNorm = 1.5905, lr_0 = 1.7065e-04
Loss = 3.0372e-01, PNorm = 62.9910, GNorm = 1.4608, lr_0 = 1.7054e-04
Loss = 3.4663e-01, PNorm = 62.9916, GNorm = 1.1453, lr_0 = 1.7042e-04
Loss = 3.9232e-01, PNorm = 62.9926, GNorm = 1.4591, lr_0 = 1.7030e-04
Loss = 3.2899e-01, PNorm = 62.9955, GNorm = 1.2316, lr_0 = 1.7019e-04
Loss = 3.5648e-01, PNorm = 62.9963, GNorm = 1.5690, lr_0 = 1.7007e-04
Loss = 3.7221e-01, PNorm = 62.9966, GNorm = 1.2292, lr_0 = 1.6995e-04
Loss = 4.0988e-01, PNorm = 63.0006, GNorm = 1.7715, lr_0 = 1.6984e-04
Loss = 4.1195e-01, PNorm = 63.0010, GNorm = 2.7577, lr_0 = 1.6972e-04
Loss = 2.9691e-01, PNorm = 63.0013, GNorm = 1.4866, lr_0 = 1.6960e-04
Loss = 3.4808e-01, PNorm = 63.0042, GNorm = 2.1624, lr_0 = 1.6949e-04
Loss = 3.6276e-01, PNorm = 63.0062, GNorm = 1.6251, lr_0 = 1.6937e-04
Loss = 2.9926e-01, PNorm = 63.0076, GNorm = 1.4816, lr_0 = 1.6926e-04
Loss = 3.4390e-01, PNorm = 63.0091, GNorm = 1.5788, lr_0 = 1.6914e-04
Loss = 3.5534e-01, PNorm = 63.0111, GNorm = 1.2869, lr_0 = 1.6902e-04
Loss = 3.3627e-01, PNorm = 63.0127, GNorm = 1.4354, lr_0 = 1.6891e-04
Loss = 3.6163e-01, PNorm = 63.0141, GNorm = 1.3249, lr_0 = 1.6879e-04
Loss = 3.9492e-01, PNorm = 63.0159, GNorm = 1.8250, lr_0 = 1.6868e-04
Loss = 3.6753e-01, PNorm = 63.0156, GNorm = 1.4985, lr_0 = 1.6856e-04
Loss = 3.9840e-01, PNorm = 63.0168, GNorm = 1.1190, lr_0 = 1.6845e-04
Loss = 3.2673e-01, PNorm = 63.0221, GNorm = 1.1344, lr_0 = 1.6833e-04
Loss = 3.2413e-01, PNorm = 63.0255, GNorm = 1.2062, lr_0 = 1.6821e-04
Loss = 3.5583e-01, PNorm = 63.0263, GNorm = 1.5084, lr_0 = 1.6810e-04
Loss = 3.8879e-01, PNorm = 63.0283, GNorm = 1.3336, lr_0 = 1.6798e-04
Loss = 3.6772e-01, PNorm = 63.0300, GNorm = 1.3445, lr_0 = 1.6787e-04
Loss = 3.3856e-01, PNorm = 63.0294, GNorm = 1.6210, lr_0 = 1.6775e-04
Loss = 3.9152e-01, PNorm = 63.0314, GNorm = 1.3007, lr_0 = 1.6764e-04
Loss = 3.3614e-01, PNorm = 63.0339, GNorm = 1.3900, lr_0 = 1.6752e-04
Loss = 4.0504e-01, PNorm = 63.0362, GNorm = 1.9164, lr_0 = 1.6741e-04
Loss = 3.5830e-01, PNorm = 63.0382, GNorm = 1.2102, lr_0 = 1.6729e-04
Loss = 4.1512e-01, PNorm = 63.0414, GNorm = 1.3912, lr_0 = 1.6718e-04
Loss = 3.5593e-01, PNorm = 63.0407, GNorm = 1.6549, lr_0 = 1.6707e-04
Loss = 3.5105e-01, PNorm = 63.0422, GNorm = 1.0648, lr_0 = 1.6695e-04
Loss = 3.8250e-01, PNorm = 63.0442, GNorm = 1.7574, lr_0 = 1.6684e-04
Loss = 3.2757e-01, PNorm = 63.0438, GNorm = 1.3541, lr_0 = 1.6672e-04
Loss = 3.7588e-01, PNorm = 63.0449, GNorm = 2.2722, lr_0 = 1.6661e-04
Loss = 3.2706e-01, PNorm = 63.0507, GNorm = 1.3028, lr_0 = 1.6649e-04
Loss = 4.3570e-01, PNorm = 63.0537, GNorm = 1.6886, lr_0 = 1.6638e-04
Loss = 3.2979e-01, PNorm = 63.0559, GNorm = 1.3960, lr_0 = 1.6627e-04
Loss = 3.7080e-01, PNorm = 63.0584, GNorm = 1.1214, lr_0 = 1.6615e-04
Loss = 3.2886e-01, PNorm = 63.0608, GNorm = 1.2825, lr_0 = 1.6604e-04
Loss = 3.5268e-01, PNorm = 63.0624, GNorm = 1.3471, lr_0 = 1.6592e-04
Loss = 3.7237e-01, PNorm = 63.0620, GNorm = 1.2528, lr_0 = 1.6581e-04
Loss = 3.6324e-01, PNorm = 63.0628, GNorm = 1.2897, lr_0 = 1.6570e-04
Loss = 3.5610e-01, PNorm = 63.0645, GNorm = 1.2345, lr_0 = 1.6558e-04
Loss = 3.1480e-01, PNorm = 63.0659, GNorm = 1.1580, lr_0 = 1.6547e-04
Loss = 3.4532e-01, PNorm = 63.0657, GNorm = 1.2338, lr_0 = 1.6536e-04
Loss = 4.4221e-01, PNorm = 63.0639, GNorm = 1.5840, lr_0 = 1.6524e-04
Loss = 3.4951e-01, PNorm = 63.0658, GNorm = 1.4656, lr_0 = 1.6513e-04
Loss = 3.8404e-01, PNorm = 63.0705, GNorm = 2.0666, lr_0 = 1.6502e-04
Loss = 3.4084e-01, PNorm = 63.0720, GNorm = 1.6965, lr_0 = 1.6490e-04
Loss = 3.5074e-01, PNorm = 63.0747, GNorm = 1.6546, lr_0 = 1.6479e-04
Loss = 3.8234e-01, PNorm = 63.0760, GNorm = 1.6327, lr_0 = 1.6468e-04
Loss = 4.1266e-01, PNorm = 63.0763, GNorm = 1.3708, lr_0 = 1.6457e-04
Loss = 3.6850e-01, PNorm = 63.0774, GNorm = 1.4189, lr_0 = 1.6445e-04
Loss = 3.4995e-01, PNorm = 63.0790, GNorm = 1.1952, lr_0 = 1.6434e-04
Loss = 3.4023e-01, PNorm = 63.0816, GNorm = 1.1807, lr_0 = 1.6423e-04
Loss = 3.3807e-01, PNorm = 63.0818, GNorm = 1.6611, lr_0 = 1.6412e-04
Loss = 3.1450e-01, PNorm = 63.0819, GNorm = 1.6523, lr_0 = 1.6400e-04
Loss = 3.4408e-01, PNorm = 63.0830, GNorm = 1.7859, lr_0 = 1.6389e-04
Loss = 3.6340e-01, PNorm = 63.0855, GNorm = 1.5144, lr_0 = 1.6378e-04
Validation mae = 0.111808
Epoch 24
Loss = 3.3439e-01, PNorm = 63.0864, GNorm = 1.3954, lr_0 = 1.6367e-04
Loss = 3.8060e-01, PNorm = 63.0873, GNorm = 1.9172, lr_0 = 1.6355e-04
Loss = 3.2820e-01, PNorm = 63.0911, GNorm = 1.6217, lr_0 = 1.6344e-04
Loss = 3.8331e-01, PNorm = 63.0941, GNorm = 1.2248, lr_0 = 1.6333e-04
Loss = 3.6044e-01, PNorm = 63.0973, GNorm = 1.3703, lr_0 = 1.6322e-04
Loss = 3.6789e-01, PNorm = 63.0979, GNorm = 1.5957, lr_0 = 1.6311e-04
Loss = 4.0063e-01, PNorm = 63.0966, GNorm = 1.3243, lr_0 = 1.6299e-04
Loss = 3.3781e-01, PNorm = 63.0977, GNorm = 1.5334, lr_0 = 1.6288e-04
Loss = 3.2968e-01, PNorm = 63.0987, GNorm = 1.2437, lr_0 = 1.6277e-04
Loss = 3.3281e-01, PNorm = 63.1001, GNorm = 1.3700, lr_0 = 1.6266e-04
Loss = 3.3053e-01, PNorm = 63.1017, GNorm = 1.7560, lr_0 = 1.6255e-04
Loss = 3.7251e-01, PNorm = 63.1020, GNorm = 1.2963, lr_0 = 1.6244e-04
Loss = 3.3987e-01, PNorm = 63.1040, GNorm = 1.9093, lr_0 = 1.6233e-04
Loss = 3.9872e-01, PNorm = 63.1071, GNorm = 1.9020, lr_0 = 1.6221e-04
Loss = 3.3788e-01, PNorm = 63.1087, GNorm = 1.1219, lr_0 = 1.6210e-04
Loss = 3.5201e-01, PNorm = 63.1088, GNorm = 1.2488, lr_0 = 1.6199e-04
Loss = 3.5227e-01, PNorm = 63.1119, GNorm = 1.4580, lr_0 = 1.6188e-04
Loss = 3.5335e-01, PNorm = 63.1129, GNorm = 1.8536, lr_0 = 1.6177e-04
Loss = 2.9108e-01, PNorm = 63.1136, GNorm = 1.1142, lr_0 = 1.6166e-04
Loss = 3.9872e-01, PNorm = 63.1165, GNorm = 1.2669, lr_0 = 1.6155e-04
Loss = 3.6981e-01, PNorm = 63.1182, GNorm = 1.2297, lr_0 = 1.6144e-04
Loss = 3.1556e-01, PNorm = 63.1209, GNorm = 1.4235, lr_0 = 1.6133e-04
Loss = 3.4034e-01, PNorm = 63.1239, GNorm = 1.5338, lr_0 = 1.6122e-04
Loss = 3.0473e-01, PNorm = 63.1261, GNorm = 1.1458, lr_0 = 1.6111e-04
Loss = 3.2494e-01, PNorm = 63.1281, GNorm = 1.5838, lr_0 = 1.6100e-04
Loss = 3.9182e-01, PNorm = 63.1301, GNorm = 1.9788, lr_0 = 1.6089e-04
Loss = 3.9171e-01, PNorm = 63.1333, GNorm = 1.1344, lr_0 = 1.6078e-04
Loss = 3.3325e-01, PNorm = 63.1335, GNorm = 1.7126, lr_0 = 1.6067e-04
Loss = 3.3643e-01, PNorm = 63.1326, GNorm = 1.5684, lr_0 = 1.6056e-04
Loss = 3.4608e-01, PNorm = 63.1350, GNorm = 1.2823, lr_0 = 1.6045e-04
Loss = 3.7865e-01, PNorm = 63.1380, GNorm = 1.6181, lr_0 = 1.6034e-04
Loss = 3.2422e-01, PNorm = 63.1382, GNorm = 1.4246, lr_0 = 1.6023e-04
Loss = 2.8808e-01, PNorm = 63.1389, GNorm = 1.0278, lr_0 = 1.6012e-04
Loss = 3.4739e-01, PNorm = 63.1402, GNorm = 1.7861, lr_0 = 1.6001e-04
Loss = 3.8805e-01, PNorm = 63.1413, GNorm = 1.8018, lr_0 = 1.5990e-04
Loss = 3.3498e-01, PNorm = 63.1398, GNorm = 1.3703, lr_0 = 1.5979e-04
Loss = 3.7225e-01, PNorm = 63.1396, GNorm = 1.2368, lr_0 = 1.5968e-04
Loss = 3.2387e-01, PNorm = 63.1419, GNorm = 1.2089, lr_0 = 1.5957e-04
Loss = 3.5111e-01, PNorm = 63.1444, GNorm = 1.6497, lr_0 = 1.5946e-04
Loss = 3.4570e-01, PNorm = 63.1456, GNorm = 1.3535, lr_0 = 1.5935e-04
Loss = 3.3966e-01, PNorm = 63.1484, GNorm = 1.2770, lr_0 = 1.5924e-04
Loss = 3.3567e-01, PNorm = 63.1498, GNorm = 1.2569, lr_0 = 1.5913e-04
Loss = 3.5897e-01, PNorm = 63.1507, GNorm = 1.5162, lr_0 = 1.5902e-04
Loss = 3.6693e-01, PNorm = 63.1512, GNorm = 1.2014, lr_0 = 1.5891e-04
Loss = 3.8848e-01, PNorm = 63.1523, GNorm = 1.2678, lr_0 = 1.5880e-04
Loss = 4.0167e-01, PNorm = 63.1559, GNorm = 1.2819, lr_0 = 1.5870e-04
Loss = 3.0308e-01, PNorm = 63.1594, GNorm = 1.1839, lr_0 = 1.5859e-04
Loss = 3.5293e-01, PNorm = 63.1602, GNorm = 1.8639, lr_0 = 1.5848e-04
Loss = 3.2675e-01, PNorm = 63.1619, GNorm = 1.3023, lr_0 = 1.5837e-04
Loss = 3.6358e-01, PNorm = 63.1619, GNorm = 1.4043, lr_0 = 1.5826e-04
Loss = 3.2836e-01, PNorm = 63.1631, GNorm = 1.1834, lr_0 = 1.5815e-04
Loss = 3.4088e-01, PNorm = 63.1655, GNorm = 1.0043, lr_0 = 1.5804e-04
Loss = 3.2504e-01, PNorm = 63.1670, GNorm = 2.1621, lr_0 = 1.5794e-04
Loss = 3.4093e-01, PNorm = 63.1679, GNorm = 1.2579, lr_0 = 1.5783e-04
Loss = 3.8866e-01, PNorm = 63.1679, GNorm = 1.7066, lr_0 = 1.5772e-04
Loss = 3.6279e-01, PNorm = 63.1692, GNorm = 1.5959, lr_0 = 1.5761e-04
Loss = 3.3461e-01, PNorm = 63.1712, GNorm = 1.1211, lr_0 = 1.5750e-04
Loss = 3.2144e-01, PNorm = 63.1718, GNorm = 1.3030, lr_0 = 1.5740e-04
Loss = 3.2109e-01, PNorm = 63.1723, GNorm = 1.0616, lr_0 = 1.5729e-04
Loss = 3.8669e-01, PNorm = 63.1732, GNorm = 1.4269, lr_0 = 1.5718e-04
Loss = 3.6083e-01, PNorm = 63.1761, GNorm = 1.5083, lr_0 = 1.5707e-04
Loss = 3.8625e-01, PNorm = 63.1781, GNorm = 1.9013, lr_0 = 1.5697e-04
Loss = 3.0377e-01, PNorm = 63.1808, GNorm = 2.0723, lr_0 = 1.5686e-04
Loss = 3.4250e-01, PNorm = 63.1828, GNorm = 1.0537, lr_0 = 1.5675e-04
Loss = 3.3457e-01, PNorm = 63.1835, GNorm = 1.2203, lr_0 = 1.5664e-04
Loss = 3.0403e-01, PNorm = 63.1854, GNorm = 1.1489, lr_0 = 1.5654e-04
Loss = 3.6223e-01, PNorm = 63.1860, GNorm = 1.5972, lr_0 = 1.5643e-04
Loss = 3.2471e-01, PNorm = 63.1871, GNorm = 1.5525, lr_0 = 1.5632e-04
Loss = 3.7263e-01, PNorm = 63.1895, GNorm = 0.7712, lr_0 = 1.5621e-04
Loss = 3.6526e-01, PNorm = 63.1918, GNorm = 1.2905, lr_0 = 1.5611e-04
Loss = 2.9035e-01, PNorm = 63.1936, GNorm = 1.3695, lr_0 = 1.5600e-04
Loss = 3.4673e-01, PNorm = 63.1963, GNorm = 1.6424, lr_0 = 1.5589e-04
Loss = 3.5769e-01, PNorm = 63.2004, GNorm = 1.5193, lr_0 = 1.5579e-04
Loss = 3.3695e-01, PNorm = 63.2021, GNorm = 1.5163, lr_0 = 1.5568e-04
Loss = 3.4719e-01, PNorm = 63.2027, GNorm = 1.5804, lr_0 = 1.5557e-04
Loss = 3.2415e-01, PNorm = 63.2038, GNorm = 1.3004, lr_0 = 1.5547e-04
Loss = 3.3401e-01, PNorm = 63.2059, GNorm = 1.4696, lr_0 = 1.5536e-04
Loss = 3.3995e-01, PNorm = 63.2067, GNorm = 1.2061, lr_0 = 1.5525e-04
Loss = 3.5933e-01, PNorm = 63.2069, GNorm = 1.3915, lr_0 = 1.5515e-04
Loss = 3.8016e-01, PNorm = 63.2077, GNorm = 1.9817, lr_0 = 1.5504e-04
Loss = 3.6639e-01, PNorm = 63.2086, GNorm = 1.4567, lr_0 = 1.5493e-04
Loss = 3.7702e-01, PNorm = 63.2094, GNorm = 1.7852, lr_0 = 1.5483e-04
Loss = 3.6309e-01, PNorm = 63.2097, GNorm = 1.6587, lr_0 = 1.5472e-04
Loss = 3.4727e-01, PNorm = 63.2107, GNorm = 1.4165, lr_0 = 1.5462e-04
Loss = 3.9511e-01, PNorm = 63.2134, GNorm = 1.2666, lr_0 = 1.5451e-04
Loss = 3.9383e-01, PNorm = 63.2140, GNorm = 1.5164, lr_0 = 1.5440e-04
Loss = 3.3286e-01, PNorm = 63.2162, GNorm = 1.1274, lr_0 = 1.5430e-04
Loss = 3.3904e-01, PNorm = 63.2176, GNorm = 1.5950, lr_0 = 1.5419e-04
Loss = 3.5481e-01, PNorm = 63.2188, GNorm = 2.3368, lr_0 = 1.5409e-04
Loss = 3.1910e-01, PNorm = 63.2216, GNorm = 1.5649, lr_0 = 1.5398e-04
Loss = 3.4174e-01, PNorm = 63.2244, GNorm = 1.1555, lr_0 = 1.5388e-04
Loss = 3.6174e-01, PNorm = 63.2266, GNorm = 1.6489, lr_0 = 1.5377e-04
Loss = 3.2394e-01, PNorm = 63.2269, GNorm = 1.1913, lr_0 = 1.5367e-04
Loss = 3.5405e-01, PNorm = 63.2261, GNorm = 2.2160, lr_0 = 1.5356e-04
Loss = 3.6321e-01, PNorm = 63.2267, GNorm = 1.9066, lr_0 = 1.5346e-04
Loss = 3.7846e-01, PNorm = 63.2302, GNorm = 1.3113, lr_0 = 1.5335e-04
Loss = 3.9075e-01, PNorm = 63.2337, GNorm = 1.5133, lr_0 = 1.5325e-04
Loss = 3.4343e-01, PNorm = 63.2366, GNorm = 1.1208, lr_0 = 1.5314e-04
Loss = 3.3302e-01, PNorm = 63.2365, GNorm = 1.4442, lr_0 = 1.5304e-04
Loss = 3.4879e-01, PNorm = 63.2379, GNorm = 1.0910, lr_0 = 1.5293e-04
Loss = 3.5194e-01, PNorm = 63.2414, GNorm = 1.0898, lr_0 = 1.5283e-04
Loss = 3.9571e-01, PNorm = 63.2439, GNorm = 2.1601, lr_0 = 1.5272e-04
Loss = 4.4486e-01, PNorm = 63.2467, GNorm = 1.4703, lr_0 = 1.5262e-04
Loss = 4.0649e-01, PNorm = 63.2491, GNorm = 1.3490, lr_0 = 1.5251e-04
Loss = 3.5433e-01, PNorm = 63.2509, GNorm = 1.4978, lr_0 = 1.5241e-04
Loss = 3.4818e-01, PNorm = 63.2521, GNorm = 1.1159, lr_0 = 1.5230e-04
Loss = 3.9226e-01, PNorm = 63.2513, GNorm = 1.4798, lr_0 = 1.5220e-04
Loss = 3.7062e-01, PNorm = 63.2523, GNorm = 1.4602, lr_0 = 1.5209e-04
Loss = 3.3558e-01, PNorm = 63.2531, GNorm = 1.7285, lr_0 = 1.5199e-04
Loss = 3.9609e-01, PNorm = 63.2527, GNorm = 1.4745, lr_0 = 1.5189e-04
Loss = 3.9701e-01, PNorm = 63.2537, GNorm = 2.6146, lr_0 = 1.5178e-04
Loss = 3.7583e-01, PNorm = 63.2550, GNorm = 1.7634, lr_0 = 1.5168e-04
Loss = 3.9471e-01, PNorm = 63.2582, GNorm = 1.3618, lr_0 = 1.5157e-04
Loss = 3.3551e-01, PNorm = 63.2594, GNorm = 2.2909, lr_0 = 1.5147e-04
Loss = 3.4283e-01, PNorm = 63.2603, GNorm = 1.2645, lr_0 = 1.5137e-04
Loss = 3.4306e-01, PNorm = 63.2606, GNorm = 1.5238, lr_0 = 1.5126e-04
Loss = 3.5553e-01, PNorm = 63.2624, GNorm = 2.0418, lr_0 = 1.5116e-04
Loss = 3.7915e-01, PNorm = 63.2647, GNorm = 1.8257, lr_0 = 1.5106e-04
Loss = 3.7873e-01, PNorm = 63.2667, GNorm = 1.7187, lr_0 = 1.5095e-04
Loss = 2.9997e-01, PNorm = 63.2675, GNorm = 1.5917, lr_0 = 1.5085e-04
Validation mae = 0.110806
Epoch 25
Loss = 3.2918e-01, PNorm = 63.2683, GNorm = 1.3127, lr_0 = 1.5075e-04
Loss = 3.7346e-01, PNorm = 63.2695, GNorm = 1.2619, lr_0 = 1.5064e-04
Loss = 3.9643e-01, PNorm = 63.2705, GNorm = 1.5090, lr_0 = 1.5054e-04
Loss = 4.1603e-01, PNorm = 63.2727, GNorm = 1.4298, lr_0 = 1.5044e-04
Loss = 3.3191e-01, PNorm = 63.2733, GNorm = 1.2663, lr_0 = 1.5033e-04
Loss = 3.4349e-01, PNorm = 63.2732, GNorm = 1.1568, lr_0 = 1.5023e-04
Loss = 3.8040e-01, PNorm = 63.2745, GNorm = 2.5213, lr_0 = 1.5013e-04
Loss = 3.4819e-01, PNorm = 63.2765, GNorm = 1.2056, lr_0 = 1.5002e-04
Loss = 3.6763e-01, PNorm = 63.2773, GNorm = 1.5458, lr_0 = 1.4992e-04
Loss = 3.7604e-01, PNorm = 63.2804, GNorm = 1.5696, lr_0 = 1.4982e-04
Loss = 3.3514e-01, PNorm = 63.2805, GNorm = 1.9082, lr_0 = 1.4972e-04
Loss = 3.5560e-01, PNorm = 63.2818, GNorm = 1.0620, lr_0 = 1.4961e-04
Loss = 3.5719e-01, PNorm = 63.2829, GNorm = 1.5811, lr_0 = 1.4951e-04
Loss = 3.1969e-01, PNorm = 63.2845, GNorm = 1.3249, lr_0 = 1.4941e-04
Loss = 3.8027e-01, PNorm = 63.2863, GNorm = 1.5530, lr_0 = 1.4931e-04
Loss = 3.6284e-01, PNorm = 63.2887, GNorm = 1.0571, lr_0 = 1.4920e-04
Loss = 3.0706e-01, PNorm = 63.2905, GNorm = 1.5277, lr_0 = 1.4910e-04
Loss = 3.3635e-01, PNorm = 63.2904, GNorm = 1.7697, lr_0 = 1.4900e-04
Loss = 3.6746e-01, PNorm = 63.2908, GNorm = 1.4396, lr_0 = 1.4890e-04
Loss = 3.4506e-01, PNorm = 63.2919, GNorm = 1.3132, lr_0 = 1.4880e-04
Loss = 3.2777e-01, PNorm = 63.2928, GNorm = 1.5109, lr_0 = 1.4869e-04
Loss = 3.6530e-01, PNorm = 63.2937, GNorm = 1.5106, lr_0 = 1.4859e-04
Loss = 3.1427e-01, PNorm = 63.2956, GNorm = 1.1801, lr_0 = 1.4849e-04
Loss = 3.0867e-01, PNorm = 63.2980, GNorm = 1.4366, lr_0 = 1.4839e-04
Loss = 3.6975e-01, PNorm = 63.2977, GNorm = 1.3170, lr_0 = 1.4829e-04
Loss = 3.9296e-01, PNorm = 63.2966, GNorm = 1.3627, lr_0 = 1.4818e-04
Loss = 3.8102e-01, PNorm = 63.2978, GNorm = 1.2828, lr_0 = 1.4808e-04
Loss = 3.4114e-01, PNorm = 63.2992, GNorm = 1.1900, lr_0 = 1.4798e-04
Loss = 3.5030e-01, PNorm = 63.3007, GNorm = 1.8231, lr_0 = 1.4788e-04
Loss = 3.6816e-01, PNorm = 63.3032, GNorm = 1.5869, lr_0 = 1.4778e-04
Loss = 2.9066e-01, PNorm = 63.3042, GNorm = 1.5060, lr_0 = 1.4768e-04
Loss = 3.1654e-01, PNorm = 63.3072, GNorm = 1.9759, lr_0 = 1.4758e-04
Loss = 3.8547e-01, PNorm = 63.3087, GNorm = 1.9751, lr_0 = 1.4748e-04
Loss = 3.6345e-01, PNorm = 63.3080, GNorm = 1.3204, lr_0 = 1.4737e-04
Loss = 3.2579e-01, PNorm = 63.3107, GNorm = 1.7380, lr_0 = 1.4727e-04
Loss = 3.7517e-01, PNorm = 63.3120, GNorm = 1.5789, lr_0 = 1.4717e-04
Loss = 3.5528e-01, PNorm = 63.3122, GNorm = 1.2316, lr_0 = 1.4707e-04
Loss = 3.8229e-01, PNorm = 63.3129, GNorm = 1.2138, lr_0 = 1.4697e-04
Loss = 3.2865e-01, PNorm = 63.3134, GNorm = 1.7845, lr_0 = 1.4687e-04
Loss = 3.6070e-01, PNorm = 63.3135, GNorm = 1.0958, lr_0 = 1.4677e-04
Loss = 3.3260e-01, PNorm = 63.3133, GNorm = 1.2749, lr_0 = 1.4667e-04
Loss = 3.5544e-01, PNorm = 63.3142, GNorm = 1.1791, lr_0 = 1.4657e-04
Loss = 3.2279e-01, PNorm = 63.3155, GNorm = 1.7454, lr_0 = 1.4647e-04
Loss = 3.3132e-01, PNorm = 63.3167, GNorm = 1.2925, lr_0 = 1.4637e-04
Loss = 3.1448e-01, PNorm = 63.3184, GNorm = 1.7984, lr_0 = 1.4627e-04
Loss = 3.4227e-01, PNorm = 63.3194, GNorm = 1.0551, lr_0 = 1.4617e-04
Loss = 3.2106e-01, PNorm = 63.3210, GNorm = 1.1237, lr_0 = 1.4607e-04
Loss = 3.1498e-01, PNorm = 63.3225, GNorm = 1.3523, lr_0 = 1.4597e-04
Loss = 3.4774e-01, PNorm = 63.3241, GNorm = 1.5655, lr_0 = 1.4587e-04
Loss = 3.7583e-01, PNorm = 63.3244, GNorm = 1.7114, lr_0 = 1.4577e-04
Loss = 2.9633e-01, PNorm = 63.3247, GNorm = 1.1755, lr_0 = 1.4567e-04
Loss = 3.3029e-01, PNorm = 63.3282, GNorm = 1.5045, lr_0 = 1.4557e-04
Loss = 3.2494e-01, PNorm = 63.3294, GNorm = 1.3338, lr_0 = 1.4547e-04
Loss = 3.5613e-01, PNorm = 63.3278, GNorm = 1.7537, lr_0 = 1.4537e-04
Loss = 3.6964e-01, PNorm = 63.3282, GNorm = 1.4889, lr_0 = 1.4527e-04
Loss = 2.9173e-01, PNorm = 63.3308, GNorm = 1.0328, lr_0 = 1.4517e-04
Loss = 3.1778e-01, PNorm = 63.3349, GNorm = 1.1720, lr_0 = 1.4507e-04
Loss = 2.9481e-01, PNorm = 63.3366, GNorm = 2.0961, lr_0 = 1.4497e-04
Loss = 3.3780e-01, PNorm = 63.3389, GNorm = 1.8469, lr_0 = 1.4487e-04
Loss = 3.8742e-01, PNorm = 63.3410, GNorm = 1.8763, lr_0 = 1.4477e-04
Loss = 4.0333e-01, PNorm = 63.3420, GNorm = 1.9134, lr_0 = 1.4467e-04
Loss = 3.3229e-01, PNorm = 63.3436, GNorm = 1.8801, lr_0 = 1.4457e-04
Loss = 2.8365e-01, PNorm = 63.3437, GNorm = 1.2916, lr_0 = 1.4447e-04
Loss = 3.3284e-01, PNorm = 63.3424, GNorm = 1.5332, lr_0 = 1.4438e-04
Loss = 3.5879e-01, PNorm = 63.3437, GNorm = 1.2394, lr_0 = 1.4428e-04
Loss = 3.4588e-01, PNorm = 63.3440, GNorm = 1.7674, lr_0 = 1.4418e-04
Loss = 3.7724e-01, PNorm = 63.3461, GNorm = 1.6405, lr_0 = 1.4408e-04
Loss = 3.2797e-01, PNorm = 63.3484, GNorm = 1.0142, lr_0 = 1.4398e-04
Loss = 3.5708e-01, PNorm = 63.3480, GNorm = 1.4651, lr_0 = 1.4388e-04
Loss = 3.9728e-01, PNorm = 63.3491, GNorm = 1.3212, lr_0 = 1.4378e-04
Loss = 3.7283e-01, PNorm = 63.3491, GNorm = 1.3627, lr_0 = 1.4368e-04
Loss = 3.2241e-01, PNorm = 63.3513, GNorm = 1.0059, lr_0 = 1.4359e-04
Loss = 3.6658e-01, PNorm = 63.3532, GNorm = 1.2146, lr_0 = 1.4349e-04
Loss = 3.7034e-01, PNorm = 63.3554, GNorm = 0.9717, lr_0 = 1.4339e-04
Loss = 3.7907e-01, PNorm = 63.3571, GNorm = 2.0041, lr_0 = 1.4329e-04
Loss = 3.6901e-01, PNorm = 63.3567, GNorm = 1.3524, lr_0 = 1.4319e-04
Loss = 3.6707e-01, PNorm = 63.3579, GNorm = 1.4854, lr_0 = 1.4310e-04
Loss = 3.2471e-01, PNorm = 63.3593, GNorm = 1.3333, lr_0 = 1.4300e-04
Loss = 3.5213e-01, PNorm = 63.3604, GNorm = 1.5510, lr_0 = 1.4290e-04
Loss = 3.8957e-01, PNorm = 63.3622, GNorm = 2.1541, lr_0 = 1.4280e-04
Loss = 3.8510e-01, PNorm = 63.3643, GNorm = 1.5429, lr_0 = 1.4270e-04
Loss = 3.7233e-01, PNorm = 63.3658, GNorm = 1.0095, lr_0 = 1.4261e-04
Loss = 3.0784e-01, PNorm = 63.3669, GNorm = 1.2078, lr_0 = 1.4251e-04
Loss = 3.5088e-01, PNorm = 63.3688, GNorm = 1.0961, lr_0 = 1.4241e-04
Loss = 3.0536e-01, PNorm = 63.3715, GNorm = 1.2683, lr_0 = 1.4231e-04
Loss = 3.4859e-01, PNorm = 63.3731, GNorm = 1.2529, lr_0 = 1.4222e-04
Loss = 3.5269e-01, PNorm = 63.3736, GNorm = 1.2969, lr_0 = 1.4212e-04
Loss = 3.2842e-01, PNorm = 63.3757, GNorm = 1.3025, lr_0 = 1.4202e-04
Loss = 3.6115e-01, PNorm = 63.3762, GNorm = 1.5615, lr_0 = 1.4192e-04
Loss = 3.4289e-01, PNorm = 63.3770, GNorm = 1.2917, lr_0 = 1.4183e-04
Loss = 3.2599e-01, PNorm = 63.3785, GNorm = 1.8367, lr_0 = 1.4173e-04
Loss = 3.8457e-01, PNorm = 63.3798, GNorm = 1.1958, lr_0 = 1.4163e-04
Loss = 3.4199e-01, PNorm = 63.3807, GNorm = 1.1909, lr_0 = 1.4153e-04
Loss = 3.8807e-01, PNorm = 63.3834, GNorm = 1.4846, lr_0 = 1.4144e-04
Loss = 3.6732e-01, PNorm = 63.3846, GNorm = 1.8275, lr_0 = 1.4134e-04
Loss = 3.2730e-01, PNorm = 63.3855, GNorm = 1.8667, lr_0 = 1.4124e-04
Loss = 3.2665e-01, PNorm = 63.3877, GNorm = 1.0541, lr_0 = 1.4115e-04
Loss = 3.5844e-01, PNorm = 63.3881, GNorm = 1.1312, lr_0 = 1.4105e-04
Loss = 3.3442e-01, PNorm = 63.3898, GNorm = 1.6446, lr_0 = 1.4095e-04
Loss = 3.8379e-01, PNorm = 63.3927, GNorm = 1.4411, lr_0 = 1.4086e-04
Loss = 3.3403e-01, PNorm = 63.3951, GNorm = 1.5855, lr_0 = 1.4076e-04
Loss = 3.3418e-01, PNorm = 63.3983, GNorm = 1.5403, lr_0 = 1.4066e-04
Loss = 3.2343e-01, PNorm = 63.3991, GNorm = 1.4483, lr_0 = 1.4057e-04
Loss = 4.1101e-01, PNorm = 63.3998, GNorm = 1.1995, lr_0 = 1.4047e-04
Loss = 4.1184e-01, PNorm = 63.4011, GNorm = 1.2709, lr_0 = 1.4038e-04
Loss = 3.6615e-01, PNorm = 63.4016, GNorm = 0.9043, lr_0 = 1.4028e-04
Loss = 3.9211e-01, PNorm = 63.4045, GNorm = 1.4419, lr_0 = 1.4018e-04
Loss = 3.3312e-01, PNorm = 63.4055, GNorm = 1.7935, lr_0 = 1.4009e-04
Loss = 3.4853e-01, PNorm = 63.4064, GNorm = 1.9672, lr_0 = 1.3999e-04
Loss = 3.5920e-01, PNorm = 63.4069, GNorm = 1.6974, lr_0 = 1.3990e-04
Loss = 3.3619e-01, PNorm = 63.4073, GNorm = 1.4642, lr_0 = 1.3980e-04
Loss = 3.2969e-01, PNorm = 63.4082, GNorm = 2.1699, lr_0 = 1.3970e-04
Loss = 3.6836e-01, PNorm = 63.4087, GNorm = 1.7611, lr_0 = 1.3961e-04
Loss = 3.6019e-01, PNorm = 63.4106, GNorm = 1.3374, lr_0 = 1.3951e-04
Loss = 3.3727e-01, PNorm = 63.4126, GNorm = 1.3807, lr_0 = 1.3942e-04
Loss = 3.2177e-01, PNorm = 63.4158, GNorm = 1.2783, lr_0 = 1.3932e-04
Loss = 3.5848e-01, PNorm = 63.4168, GNorm = 1.1540, lr_0 = 1.3923e-04
Loss = 3.3447e-01, PNorm = 63.4183, GNorm = 1.2477, lr_0 = 1.3913e-04
Loss = 4.0066e-01, PNorm = 63.4190, GNorm = 2.2901, lr_0 = 1.3904e-04
Loss = 3.5589e-01, PNorm = 63.4201, GNorm = 1.8172, lr_0 = 1.3894e-04
Validation mae = 0.112285
Epoch 26
Loss = 3.4296e-01, PNorm = 63.4219, GNorm = 1.3953, lr_0 = 1.3884e-04
Loss = 3.3461e-01, PNorm = 63.4248, GNorm = 1.9375, lr_0 = 1.3875e-04
Loss = 2.9262e-01, PNorm = 63.4267, GNorm = 1.5969, lr_0 = 1.3865e-04
Loss = 3.8355e-01, PNorm = 63.4267, GNorm = 1.2910, lr_0 = 1.3856e-04
Loss = 3.3660e-01, PNorm = 63.4278, GNorm = 1.0981, lr_0 = 1.3846e-04
Loss = 3.4258e-01, PNorm = 63.4293, GNorm = 1.1484, lr_0 = 1.3837e-04
Loss = 3.4220e-01, PNorm = 63.4303, GNorm = 1.6819, lr_0 = 1.3828e-04
Loss = 3.4537e-01, PNorm = 63.4330, GNorm = 1.5218, lr_0 = 1.3818e-04
Loss = 3.7658e-01, PNorm = 63.4344, GNorm = 1.4764, lr_0 = 1.3809e-04
Loss = 3.5894e-01, PNorm = 63.4348, GNorm = 1.1377, lr_0 = 1.3799e-04
Loss = 3.3167e-01, PNorm = 63.4361, GNorm = 1.0881, lr_0 = 1.3790e-04
Loss = 3.1135e-01, PNorm = 63.4380, GNorm = 1.3712, lr_0 = 1.3780e-04
Loss = 3.2221e-01, PNorm = 63.4415, GNorm = 1.7758, lr_0 = 1.3771e-04
Loss = 3.5008e-01, PNorm = 63.4434, GNorm = 1.6976, lr_0 = 1.3761e-04
Loss = 2.8624e-01, PNorm = 63.4434, GNorm = 1.3342, lr_0 = 1.3752e-04
Loss = 3.0371e-01, PNorm = 63.4437, GNorm = 1.6372, lr_0 = 1.3742e-04
Loss = 3.4430e-01, PNorm = 63.4449, GNorm = 1.5921, lr_0 = 1.3733e-04
Loss = 2.8658e-01, PNorm = 63.4457, GNorm = 1.3071, lr_0 = 1.3724e-04
Loss = 3.8941e-01, PNorm = 63.4451, GNorm = 1.3492, lr_0 = 1.3714e-04
Loss = 3.1639e-01, PNorm = 63.4466, GNorm = 1.6417, lr_0 = 1.3705e-04
Loss = 3.7367e-01, PNorm = 63.4477, GNorm = 1.5262, lr_0 = 1.3695e-04
Loss = 3.4193e-01, PNorm = 63.4481, GNorm = 1.5897, lr_0 = 1.3686e-04
Loss = 3.7119e-01, PNorm = 63.4490, GNorm = 1.7416, lr_0 = 1.3677e-04
Loss = 3.7737e-01, PNorm = 63.4514, GNorm = 1.5035, lr_0 = 1.3667e-04
Loss = 3.4591e-01, PNorm = 63.4515, GNorm = 1.7272, lr_0 = 1.3658e-04
Loss = 3.9410e-01, PNorm = 63.4517, GNorm = 1.6367, lr_0 = 1.3649e-04
Loss = 3.2188e-01, PNorm = 63.4548, GNorm = 1.4023, lr_0 = 1.3639e-04
Loss = 3.2711e-01, PNorm = 63.4556, GNorm = 1.1645, lr_0 = 1.3630e-04
Loss = 3.2836e-01, PNorm = 63.4566, GNorm = 1.5231, lr_0 = 1.3621e-04
Loss = 3.2537e-01, PNorm = 63.4587, GNorm = 1.6017, lr_0 = 1.3611e-04
Loss = 3.1822e-01, PNorm = 63.4592, GNorm = 1.3327, lr_0 = 1.3602e-04
Loss = 3.9004e-01, PNorm = 63.4586, GNorm = 1.2446, lr_0 = 1.3593e-04
Loss = 3.7777e-01, PNorm = 63.4605, GNorm = 1.3205, lr_0 = 1.3583e-04
Loss = 3.7192e-01, PNorm = 63.4646, GNorm = 1.7278, lr_0 = 1.3574e-04
Loss = 3.1078e-01, PNorm = 63.4661, GNorm = 1.2993, lr_0 = 1.3565e-04
Loss = 2.9968e-01, PNorm = 63.4673, GNorm = 1.2791, lr_0 = 1.3555e-04
Loss = 3.0948e-01, PNorm = 63.4690, GNorm = 1.4717, lr_0 = 1.3546e-04
Loss = 3.3638e-01, PNorm = 63.4706, GNorm = 1.7080, lr_0 = 1.3537e-04
Loss = 3.0174e-01, PNorm = 63.4706, GNorm = 1.9847, lr_0 = 1.3528e-04
Loss = 3.7844e-01, PNorm = 63.4728, GNorm = 1.5624, lr_0 = 1.3518e-04
Loss = 3.9031e-01, PNorm = 63.4770, GNorm = 2.1496, lr_0 = 1.3509e-04
Loss = 4.0241e-01, PNorm = 63.4785, GNorm = 2.1964, lr_0 = 1.3500e-04
Loss = 3.8521e-01, PNorm = 63.4799, GNorm = 1.5723, lr_0 = 1.3491e-04
Loss = 3.4402e-01, PNorm = 63.4830, GNorm = 1.0720, lr_0 = 1.3481e-04
Loss = 3.3697e-01, PNorm = 63.4844, GNorm = 1.3384, lr_0 = 1.3472e-04
Loss = 3.4983e-01, PNorm = 63.4827, GNorm = 1.6599, lr_0 = 1.3463e-04
Loss = 3.9646e-01, PNorm = 63.4838, GNorm = 1.5035, lr_0 = 1.3454e-04
Loss = 3.6305e-01, PNorm = 63.4839, GNorm = 2.0815, lr_0 = 1.3444e-04
Loss = 3.6408e-01, PNorm = 63.4823, GNorm = 1.5100, lr_0 = 1.3435e-04
Loss = 3.5738e-01, PNorm = 63.4832, GNorm = 1.4638, lr_0 = 1.3426e-04
Loss = 3.3539e-01, PNorm = 63.4846, GNorm = 1.3423, lr_0 = 1.3417e-04
Loss = 3.8124e-01, PNorm = 63.4838, GNorm = 1.4219, lr_0 = 1.3408e-04
Loss = 3.9261e-01, PNorm = 63.4833, GNorm = 1.7181, lr_0 = 1.3398e-04
Loss = 3.2608e-01, PNorm = 63.4851, GNorm = 2.1714, lr_0 = 1.3389e-04
Loss = 3.2569e-01, PNorm = 63.4841, GNorm = 1.3427, lr_0 = 1.3380e-04
Loss = 3.4185e-01, PNorm = 63.4869, GNorm = 1.3801, lr_0 = 1.3371e-04
Loss = 3.1620e-01, PNorm = 63.4882, GNorm = 1.4096, lr_0 = 1.3362e-04
Loss = 3.3906e-01, PNorm = 63.4893, GNorm = 1.6107, lr_0 = 1.3353e-04
Loss = 3.2022e-01, PNorm = 63.4920, GNorm = 1.4647, lr_0 = 1.3343e-04
Loss = 3.0475e-01, PNorm = 63.4934, GNorm = 1.2631, lr_0 = 1.3334e-04
Loss = 3.8182e-01, PNorm = 63.4952, GNorm = 1.5121, lr_0 = 1.3325e-04
Loss = 3.3040e-01, PNorm = 63.4972, GNorm = 1.6730, lr_0 = 1.3316e-04
Loss = 3.7398e-01, PNorm = 63.4999, GNorm = 1.9978, lr_0 = 1.3307e-04
Loss = 3.5114e-01, PNorm = 63.5022, GNorm = 1.7654, lr_0 = 1.3298e-04
Loss = 3.8268e-01, PNorm = 63.5022, GNorm = 1.4648, lr_0 = 1.3289e-04
Loss = 3.7442e-01, PNorm = 63.5030, GNorm = 1.6463, lr_0 = 1.3280e-04
Loss = 3.5269e-01, PNorm = 63.5034, GNorm = 1.5609, lr_0 = 1.3270e-04
Loss = 3.6951e-01, PNorm = 63.5037, GNorm = 1.5869, lr_0 = 1.3261e-04
Loss = 3.6909e-01, PNorm = 63.5048, GNorm = 1.6263, lr_0 = 1.3252e-04
Loss = 3.3047e-01, PNorm = 63.5042, GNorm = 1.1325, lr_0 = 1.3243e-04
Loss = 3.7391e-01, PNorm = 63.5020, GNorm = 1.6358, lr_0 = 1.3234e-04
Loss = 3.3264e-01, PNorm = 63.5023, GNorm = 1.2188, lr_0 = 1.3225e-04
Loss = 3.3545e-01, PNorm = 63.5032, GNorm = 2.2904, lr_0 = 1.3216e-04
Loss = 3.7877e-01, PNorm = 63.5052, GNorm = 2.1009, lr_0 = 1.3207e-04
Loss = 3.3403e-01, PNorm = 63.5062, GNorm = 1.5140, lr_0 = 1.3198e-04
Loss = 3.3077e-01, PNorm = 63.5080, GNorm = 1.3848, lr_0 = 1.3189e-04
Loss = 3.1388e-01, PNorm = 63.5088, GNorm = 1.1581, lr_0 = 1.3180e-04
Loss = 3.4663e-01, PNorm = 63.5089, GNorm = 1.3135, lr_0 = 1.3171e-04
Loss = 4.3095e-01, PNorm = 63.5094, GNorm = 1.2819, lr_0 = 1.3162e-04
Loss = 3.7075e-01, PNorm = 63.5111, GNorm = 1.2894, lr_0 = 1.3153e-04
Loss = 3.3495e-01, PNorm = 63.5126, GNorm = 1.7013, lr_0 = 1.3144e-04
Loss = 3.7042e-01, PNorm = 63.5136, GNorm = 0.9602, lr_0 = 1.3135e-04
Loss = 3.6590e-01, PNorm = 63.5133, GNorm = 1.6150, lr_0 = 1.3126e-04
Loss = 3.7972e-01, PNorm = 63.5135, GNorm = 1.2266, lr_0 = 1.3117e-04
Loss = 3.7922e-01, PNorm = 63.5169, GNorm = 1.3192, lr_0 = 1.3108e-04
Loss = 3.1185e-01, PNorm = 63.5196, GNorm = 1.6235, lr_0 = 1.3099e-04
Loss = 3.3430e-01, PNorm = 63.5193, GNorm = 0.9534, lr_0 = 1.3090e-04
Loss = 4.0293e-01, PNorm = 63.5193, GNorm = 1.6560, lr_0 = 1.3081e-04
Loss = 3.5866e-01, PNorm = 63.5206, GNorm = 1.4363, lr_0 = 1.3072e-04
Loss = 3.6473e-01, PNorm = 63.5212, GNorm = 1.6368, lr_0 = 1.3063e-04
Loss = 2.9532e-01, PNorm = 63.5222, GNorm = 1.3371, lr_0 = 1.3054e-04
Loss = 3.5450e-01, PNorm = 63.5246, GNorm = 1.4370, lr_0 = 1.3045e-04
Loss = 3.2848e-01, PNorm = 63.5257, GNorm = 1.5509, lr_0 = 1.3036e-04
Loss = 3.4274e-01, PNorm = 63.5246, GNorm = 1.2560, lr_0 = 1.3027e-04
Loss = 3.9815e-01, PNorm = 63.5241, GNorm = 1.5021, lr_0 = 1.3018e-04
Loss = 3.3022e-01, PNorm = 63.5244, GNorm = 1.4306, lr_0 = 1.3009e-04
Loss = 3.3496e-01, PNorm = 63.5270, GNorm = 1.4210, lr_0 = 1.3000e-04
Loss = 3.4708e-01, PNorm = 63.5283, GNorm = 1.1995, lr_0 = 1.2992e-04
Loss = 3.7005e-01, PNorm = 63.5298, GNorm = 1.6143, lr_0 = 1.2983e-04
Loss = 3.1515e-01, PNorm = 63.5301, GNorm = 1.1245, lr_0 = 1.2974e-04
Loss = 2.9115e-01, PNorm = 63.5316, GNorm = 1.0187, lr_0 = 1.2965e-04
Loss = 3.8219e-01, PNorm = 63.5338, GNorm = 1.2235, lr_0 = 1.2956e-04
Loss = 4.0856e-01, PNorm = 63.5347, GNorm = 1.9793, lr_0 = 1.2947e-04
Loss = 3.3712e-01, PNorm = 63.5348, GNorm = 1.3877, lr_0 = 1.2938e-04
Loss = 3.0280e-01, PNorm = 63.5351, GNorm = 1.0226, lr_0 = 1.2929e-04
Loss = 3.7699e-01, PNorm = 63.5364, GNorm = 1.8063, lr_0 = 1.2921e-04
Loss = 3.0155e-01, PNorm = 63.5374, GNorm = 1.0110, lr_0 = 1.2912e-04
Loss = 2.9970e-01, PNorm = 63.5374, GNorm = 1.2821, lr_0 = 1.2903e-04
Loss = 2.7818e-01, PNorm = 63.5389, GNorm = 2.0664, lr_0 = 1.2894e-04
Loss = 3.1984e-01, PNorm = 63.5414, GNorm = 1.7649, lr_0 = 1.2885e-04
Loss = 3.4955e-01, PNorm = 63.5414, GNorm = 1.5641, lr_0 = 1.2876e-04
Loss = 3.6385e-01, PNorm = 63.5440, GNorm = 1.0425, lr_0 = 1.2867e-04
Loss = 3.2363e-01, PNorm = 63.5442, GNorm = 1.3770, lr_0 = 1.2859e-04
Loss = 3.0342e-01, PNorm = 63.5440, GNorm = 1.9320, lr_0 = 1.2850e-04
Loss = 2.9503e-01, PNorm = 63.5438, GNorm = 1.1548, lr_0 = 1.2841e-04
Loss = 3.6879e-01, PNorm = 63.5458, GNorm = 1.4853, lr_0 = 1.2832e-04
Loss = 4.0275e-01, PNorm = 63.5468, GNorm = 1.4881, lr_0 = 1.2823e-04
Loss = 3.6933e-01, PNorm = 63.5486, GNorm = 1.0658, lr_0 = 1.2815e-04
Loss = 3.8104e-01, PNorm = 63.5502, GNorm = 1.1589, lr_0 = 1.2806e-04
Loss = 3.5699e-01, PNorm = 63.5524, GNorm = 1.1369, lr_0 = 1.2797e-04
Validation mae = 0.111790
Epoch 27
Loss = 3.2417e-01, PNorm = 63.5518, GNorm = 1.2226, lr_0 = 1.2788e-04
Loss = 3.4020e-01, PNorm = 63.5510, GNorm = 1.4378, lr_0 = 1.2780e-04
Loss = 2.9811e-01, PNorm = 63.5529, GNorm = 1.2358, lr_0 = 1.2771e-04
Loss = 3.4273e-01, PNorm = 63.5551, GNorm = 1.4362, lr_0 = 1.2762e-04
Loss = 3.0129e-01, PNorm = 63.5558, GNorm = 1.3273, lr_0 = 1.2753e-04
Loss = 3.4892e-01, PNorm = 63.5569, GNorm = 1.2907, lr_0 = 1.2745e-04
Loss = 3.3816e-01, PNorm = 63.5590, GNorm = 1.8938, lr_0 = 1.2736e-04
Loss = 3.2563e-01, PNorm = 63.5605, GNorm = 1.4340, lr_0 = 1.2727e-04
Loss = 3.6502e-01, PNorm = 63.5612, GNorm = 1.3837, lr_0 = 1.2718e-04
Loss = 3.4348e-01, PNorm = 63.5625, GNorm = 1.0626, lr_0 = 1.2710e-04
Loss = 3.1373e-01, PNorm = 63.5641, GNorm = 1.4381, lr_0 = 1.2701e-04
Loss = 4.2832e-01, PNorm = 63.5652, GNorm = 1.3313, lr_0 = 1.2692e-04
Loss = 3.3664e-01, PNorm = 63.5666, GNorm = 2.0807, lr_0 = 1.2684e-04
Loss = 3.4564e-01, PNorm = 63.5669, GNorm = 1.0863, lr_0 = 1.2675e-04
Loss = 3.6923e-01, PNorm = 63.5675, GNorm = 1.8298, lr_0 = 1.2666e-04
Loss = 3.4221e-01, PNorm = 63.5685, GNorm = 1.3603, lr_0 = 1.2658e-04
Loss = 3.3039e-01, PNorm = 63.5694, GNorm = 1.4829, lr_0 = 1.2649e-04
Loss = 3.7305e-01, PNorm = 63.5689, GNorm = 1.0977, lr_0 = 1.2640e-04
Loss = 2.8581e-01, PNorm = 63.5694, GNorm = 1.0409, lr_0 = 1.2632e-04
Loss = 3.6344e-01, PNorm = 63.5729, GNorm = 1.4621, lr_0 = 1.2623e-04
Loss = 3.5902e-01, PNorm = 63.5751, GNorm = 1.5893, lr_0 = 1.2614e-04
Loss = 3.1252e-01, PNorm = 63.5751, GNorm = 1.4314, lr_0 = 1.2606e-04
Loss = 3.4107e-01, PNorm = 63.5761, GNorm = 1.4625, lr_0 = 1.2597e-04
Loss = 3.3270e-01, PNorm = 63.5766, GNorm = 1.8288, lr_0 = 1.2588e-04
Loss = 2.8728e-01, PNorm = 63.5781, GNorm = 1.0696, lr_0 = 1.2580e-04
Loss = 3.5267e-01, PNorm = 63.5779, GNorm = 1.2329, lr_0 = 1.2571e-04
Loss = 3.3327e-01, PNorm = 63.5800, GNorm = 1.1205, lr_0 = 1.2563e-04
Loss = 3.4070e-01, PNorm = 63.5827, GNorm = 1.0913, lr_0 = 1.2554e-04
Loss = 3.2060e-01, PNorm = 63.5839, GNorm = 1.2402, lr_0 = 1.2545e-04
Loss = 3.6334e-01, PNorm = 63.5854, GNorm = 1.2905, lr_0 = 1.2537e-04
Loss = 3.3100e-01, PNorm = 63.5861, GNorm = 1.1725, lr_0 = 1.2528e-04
Loss = 3.4957e-01, PNorm = 63.5871, GNorm = 2.1003, lr_0 = 1.2520e-04
Loss = 3.6383e-01, PNorm = 63.5866, GNorm = 1.8410, lr_0 = 1.2511e-04
Loss = 3.5761e-01, PNorm = 63.5864, GNorm = 1.7106, lr_0 = 1.2502e-04
Loss = 3.6965e-01, PNorm = 63.5870, GNorm = 1.7768, lr_0 = 1.2494e-04
Loss = 3.2752e-01, PNorm = 63.5883, GNorm = 1.7480, lr_0 = 1.2485e-04
Loss = 3.7618e-01, PNorm = 63.5886, GNorm = 1.4366, lr_0 = 1.2477e-04
Loss = 3.4255e-01, PNorm = 63.5890, GNorm = 1.9076, lr_0 = 1.2468e-04
Loss = 3.5818e-01, PNorm = 63.5910, GNorm = 1.6175, lr_0 = 1.2460e-04
Loss = 3.4144e-01, PNorm = 63.5952, GNorm = 1.5685, lr_0 = 1.2451e-04
Loss = 3.5617e-01, PNorm = 63.5961, GNorm = 1.2033, lr_0 = 1.2443e-04
Loss = 3.3905e-01, PNorm = 63.5968, GNorm = 1.3305, lr_0 = 1.2434e-04
Loss = 3.3749e-01, PNorm = 63.5994, GNorm = 1.8412, lr_0 = 1.2426e-04
Loss = 3.7106e-01, PNorm = 63.6002, GNorm = 1.6457, lr_0 = 1.2417e-04
Loss = 3.0758e-01, PNorm = 63.6025, GNorm = 1.1731, lr_0 = 1.2409e-04
Loss = 3.5121e-01, PNorm = 63.6042, GNorm = 1.6750, lr_0 = 1.2400e-04
Loss = 3.3558e-01, PNorm = 63.6043, GNorm = 1.4228, lr_0 = 1.2392e-04
Loss = 3.4188e-01, PNorm = 63.6058, GNorm = 1.4832, lr_0 = 1.2383e-04
Loss = 3.2438e-01, PNorm = 63.6071, GNorm = 1.7442, lr_0 = 1.2375e-04
Loss = 4.1845e-01, PNorm = 63.6068, GNorm = 1.5613, lr_0 = 1.2366e-04
Loss = 3.1465e-01, PNorm = 63.6088, GNorm = 1.4393, lr_0 = 1.2358e-04
Loss = 3.8803e-01, PNorm = 63.6119, GNorm = 2.0192, lr_0 = 1.2349e-04
Loss = 3.4751e-01, PNorm = 63.6138, GNorm = 1.8158, lr_0 = 1.2341e-04
Loss = 3.5873e-01, PNorm = 63.6146, GNorm = 1.7451, lr_0 = 1.2332e-04
Loss = 3.4060e-01, PNorm = 63.6165, GNorm = 1.6882, lr_0 = 1.2324e-04
Loss = 3.3487e-01, PNorm = 63.6178, GNorm = 1.3154, lr_0 = 1.2315e-04
Loss = 3.7735e-01, PNorm = 63.6186, GNorm = 1.5016, lr_0 = 1.2307e-04
Loss = 3.3183e-01, PNorm = 63.6189, GNorm = 1.1189, lr_0 = 1.2298e-04
Loss = 3.1261e-01, PNorm = 63.6188, GNorm = 1.2464, lr_0 = 1.2290e-04
Loss = 3.9889e-01, PNorm = 63.6196, GNorm = 1.4966, lr_0 = 1.2282e-04
Loss = 3.3494e-01, PNorm = 63.6207, GNorm = 1.4168, lr_0 = 1.2273e-04
Loss = 3.3663e-01, PNorm = 63.6220, GNorm = 1.0460, lr_0 = 1.2265e-04
Loss = 3.6131e-01, PNorm = 63.6228, GNorm = 1.4807, lr_0 = 1.2256e-04
Loss = 4.4447e-01, PNorm = 63.6241, GNorm = 1.6376, lr_0 = 1.2248e-04
Loss = 3.3846e-01, PNorm = 63.6254, GNorm = 1.1895, lr_0 = 1.2240e-04
Loss = 3.5967e-01, PNorm = 63.6264, GNorm = 1.2515, lr_0 = 1.2231e-04
Loss = 3.0704e-01, PNorm = 63.6275, GNorm = 1.2091, lr_0 = 1.2223e-04
Loss = 3.4606e-01, PNorm = 63.6288, GNorm = 1.1734, lr_0 = 1.2214e-04
Loss = 3.3946e-01, PNorm = 63.6308, GNorm = 1.4705, lr_0 = 1.2206e-04
Loss = 3.6758e-01, PNorm = 63.6323, GNorm = 1.8805, lr_0 = 1.2198e-04
Loss = 3.4670e-01, PNorm = 63.6315, GNorm = 1.0628, lr_0 = 1.2189e-04
Loss = 3.6739e-01, PNorm = 63.6320, GNorm = 1.3343, lr_0 = 1.2181e-04
Loss = 3.8540e-01, PNorm = 63.6340, GNorm = 1.3289, lr_0 = 1.2173e-04
Loss = 3.8528e-01, PNorm = 63.6357, GNorm = 1.1560, lr_0 = 1.2164e-04
Loss = 3.7070e-01, PNorm = 63.6364, GNorm = 1.7983, lr_0 = 1.2156e-04
Loss = 3.5314e-01, PNorm = 63.6375, GNorm = 1.6025, lr_0 = 1.2148e-04
Loss = 2.8355e-01, PNorm = 63.6381, GNorm = 1.4605, lr_0 = 1.2139e-04
Loss = 3.5682e-01, PNorm = 63.6368, GNorm = 1.5215, lr_0 = 1.2131e-04
Loss = 3.5836e-01, PNorm = 63.6354, GNorm = 1.6139, lr_0 = 1.2123e-04
Loss = 3.4136e-01, PNorm = 63.6357, GNorm = 1.5956, lr_0 = 1.2114e-04
Loss = 3.5076e-01, PNorm = 63.6368, GNorm = 1.2791, lr_0 = 1.2106e-04
Loss = 3.5453e-01, PNorm = 63.6372, GNorm = 1.1878, lr_0 = 1.2098e-04
Loss = 3.4693e-01, PNorm = 63.6386, GNorm = 1.7984, lr_0 = 1.2090e-04
Loss = 3.6812e-01, PNorm = 63.6405, GNorm = 1.6037, lr_0 = 1.2081e-04
Loss = 3.5774e-01, PNorm = 63.6413, GNorm = 1.3194, lr_0 = 1.2073e-04
Loss = 4.0304e-01, PNorm = 63.6418, GNorm = 1.4208, lr_0 = 1.2065e-04
Loss = 3.0450e-01, PNorm = 63.6418, GNorm = 0.9679, lr_0 = 1.2056e-04
Loss = 3.3443e-01, PNorm = 63.6431, GNorm = 1.3704, lr_0 = 1.2048e-04
Loss = 3.5124e-01, PNorm = 63.6437, GNorm = 1.2949, lr_0 = 1.2040e-04
Loss = 3.3473e-01, PNorm = 63.6446, GNorm = 1.7481, lr_0 = 1.2032e-04
Loss = 3.0239e-01, PNorm = 63.6464, GNorm = 1.3699, lr_0 = 1.2023e-04
Loss = 3.5506e-01, PNorm = 63.6486, GNorm = 1.7384, lr_0 = 1.2015e-04
Loss = 3.2847e-01, PNorm = 63.6500, GNorm = 1.4385, lr_0 = 1.2007e-04
Loss = 3.3484e-01, PNorm = 63.6504, GNorm = 1.1364, lr_0 = 1.1999e-04
Loss = 3.0221e-01, PNorm = 63.6513, GNorm = 1.1514, lr_0 = 1.1991e-04
Loss = 3.5030e-01, PNorm = 63.6524, GNorm = 1.8301, lr_0 = 1.1982e-04
Loss = 3.8011e-01, PNorm = 63.6540, GNorm = 1.7453, lr_0 = 1.1974e-04
Loss = 3.2681e-01, PNorm = 63.6541, GNorm = 1.8462, lr_0 = 1.1966e-04
Loss = 3.8912e-01, PNorm = 63.6522, GNorm = 1.5982, lr_0 = 1.1958e-04
Loss = 3.5791e-01, PNorm = 63.6520, GNorm = 1.8803, lr_0 = 1.1950e-04
Loss = 3.5367e-01, PNorm = 63.6514, GNorm = 1.1482, lr_0 = 1.1941e-04
Loss = 3.4596e-01, PNorm = 63.6528, GNorm = 1.6087, lr_0 = 1.1933e-04
Loss = 3.2126e-01, PNorm = 63.6544, GNorm = 1.4234, lr_0 = 1.1925e-04
Loss = 3.2661e-01, PNorm = 63.6559, GNorm = 1.4218, lr_0 = 1.1917e-04
Loss = 3.5745e-01, PNorm = 63.6573, GNorm = 1.5127, lr_0 = 1.1909e-04
Loss = 3.5055e-01, PNorm = 63.6583, GNorm = 2.1160, lr_0 = 1.1901e-04
Loss = 3.8642e-01, PNorm = 63.6581, GNorm = 1.4020, lr_0 = 1.1892e-04
Loss = 3.0781e-01, PNorm = 63.6594, GNorm = 0.9772, lr_0 = 1.1884e-04
Loss = 3.7413e-01, PNorm = 63.6605, GNorm = 1.1872, lr_0 = 1.1876e-04
Loss = 3.8435e-01, PNorm = 63.6622, GNorm = 1.5928, lr_0 = 1.1868e-04
Loss = 3.3428e-01, PNorm = 63.6641, GNorm = 1.3549, lr_0 = 1.1860e-04
Loss = 2.7853e-01, PNorm = 63.6647, GNorm = 1.5180, lr_0 = 1.1852e-04
Loss = 3.2306e-01, PNorm = 63.6651, GNorm = 1.6921, lr_0 = 1.1844e-04
Loss = 3.5077e-01, PNorm = 63.6662, GNorm = 1.6447, lr_0 = 1.1835e-04
Loss = 3.3153e-01, PNorm = 63.6664, GNorm = 1.9054, lr_0 = 1.1827e-04
Loss = 3.5264e-01, PNorm = 63.6669, GNorm = 1.3765, lr_0 = 1.1819e-04
Loss = 3.8034e-01, PNorm = 63.6678, GNorm = 1.8395, lr_0 = 1.1811e-04
Loss = 3.5571e-01, PNorm = 63.6701, GNorm = 1.7573, lr_0 = 1.1803e-04
Loss = 3.7604e-01, PNorm = 63.6719, GNorm = 2.0643, lr_0 = 1.1795e-04
Loss = 3.5092e-01, PNorm = 63.6737, GNorm = 1.3409, lr_0 = 1.1787e-04
Validation mae = 0.110943
Epoch 28
Loss = 3.8268e-01, PNorm = 63.6735, GNorm = 2.1612, lr_0 = 1.1779e-04
Loss = 3.3352e-01, PNorm = 63.6735, GNorm = 1.7505, lr_0 = 1.1771e-04
Loss = 3.2201e-01, PNorm = 63.6743, GNorm = 1.8311, lr_0 = 1.1763e-04
Loss = 3.8704e-01, PNorm = 63.6772, GNorm = 1.5979, lr_0 = 1.1755e-04
Loss = 3.0563e-01, PNorm = 63.6788, GNorm = 1.4893, lr_0 = 1.1747e-04
Loss = 3.4785e-01, PNorm = 63.6789, GNorm = 2.0942, lr_0 = 1.1739e-04
Loss = 3.4618e-01, PNorm = 63.6803, GNorm = 1.5536, lr_0 = 1.1730e-04
Loss = 3.3647e-01, PNorm = 63.6825, GNorm = 1.5703, lr_0 = 1.1722e-04
Loss = 3.3782e-01, PNorm = 63.6829, GNorm = 1.4852, lr_0 = 1.1714e-04
Loss = 3.4302e-01, PNorm = 63.6834, GNorm = 1.3854, lr_0 = 1.1706e-04
Loss = 4.0590e-01, PNorm = 63.6853, GNorm = 1.6327, lr_0 = 1.1698e-04
Loss = 3.3598e-01, PNorm = 63.6858, GNorm = 1.7171, lr_0 = 1.1690e-04
Loss = 3.3532e-01, PNorm = 63.6865, GNorm = 1.2303, lr_0 = 1.1682e-04
Loss = 3.7886e-01, PNorm = 63.6868, GNorm = 1.8421, lr_0 = 1.1674e-04
Loss = 3.5772e-01, PNorm = 63.6896, GNorm = 1.8025, lr_0 = 1.1666e-04
Loss = 3.2786e-01, PNorm = 63.6892, GNorm = 1.3070, lr_0 = 1.1658e-04
Loss = 3.3405e-01, PNorm = 63.6894, GNorm = 1.1403, lr_0 = 1.1650e-04
Loss = 3.4346e-01, PNorm = 63.6902, GNorm = 1.5762, lr_0 = 1.1642e-04
Loss = 3.1669e-01, PNorm = 63.6911, GNorm = 1.2501, lr_0 = 1.1634e-04
Loss = 3.5191e-01, PNorm = 63.6919, GNorm = 2.0413, lr_0 = 1.1626e-04
Loss = 3.8599e-01, PNorm = 63.6936, GNorm = 1.7191, lr_0 = 1.1618e-04
Loss = 3.5159e-01, PNorm = 63.6952, GNorm = 2.1775, lr_0 = 1.1611e-04
Loss = 3.2328e-01, PNorm = 63.6960, GNorm = 1.2395, lr_0 = 1.1603e-04
Loss = 3.7462e-01, PNorm = 63.6978, GNorm = 1.2836, lr_0 = 1.1595e-04
Loss = 3.5437e-01, PNorm = 63.6995, GNorm = 1.1010, lr_0 = 1.1587e-04
Loss = 3.9431e-01, PNorm = 63.6998, GNorm = 1.4168, lr_0 = 1.1579e-04
Loss = 3.5175e-01, PNorm = 63.7006, GNorm = 1.5441, lr_0 = 1.1571e-04
Loss = 3.5083e-01, PNorm = 63.7020, GNorm = 2.2502, lr_0 = 1.1563e-04
Loss = 4.0757e-01, PNorm = 63.7029, GNorm = 1.6669, lr_0 = 1.1555e-04
Loss = 3.1275e-01, PNorm = 63.7055, GNorm = 1.5972, lr_0 = 1.1547e-04
Loss = 3.6984e-01, PNorm = 63.7073, GNorm = 1.3273, lr_0 = 1.1539e-04
Loss = 3.0053e-01, PNorm = 63.7079, GNorm = 1.2915, lr_0 = 1.1531e-04
Loss = 3.1065e-01, PNorm = 63.7104, GNorm = 2.0904, lr_0 = 1.1523e-04
Loss = 3.3812e-01, PNorm = 63.7128, GNorm = 1.2830, lr_0 = 1.1515e-04
Loss = 3.5187e-01, PNorm = 63.7151, GNorm = 1.6041, lr_0 = 1.1508e-04
Loss = 3.3224e-01, PNorm = 63.7170, GNorm = 1.3973, lr_0 = 1.1500e-04
Loss = 3.1184e-01, PNorm = 63.7192, GNorm = 1.8622, lr_0 = 1.1492e-04
Loss = 3.4244e-01, PNorm = 63.7213, GNorm = 1.0735, lr_0 = 1.1484e-04
Loss = 4.1604e-01, PNorm = 63.7212, GNorm = 2.6002, lr_0 = 1.1476e-04
Loss = 3.3280e-01, PNorm = 63.7209, GNorm = 1.5788, lr_0 = 1.1468e-04
Loss = 3.1985e-01, PNorm = 63.7229, GNorm = 1.6593, lr_0 = 1.1460e-04
Loss = 3.5854e-01, PNorm = 63.7243, GNorm = 1.6509, lr_0 = 1.1452e-04
Loss = 3.0296e-01, PNorm = 63.7252, GNorm = 1.1107, lr_0 = 1.1445e-04
Loss = 3.0418e-01, PNorm = 63.7261, GNorm = 1.4569, lr_0 = 1.1437e-04
Loss = 3.5610e-01, PNorm = 63.7264, GNorm = 1.6181, lr_0 = 1.1429e-04
Loss = 3.2592e-01, PNorm = 63.7263, GNorm = 1.1640, lr_0 = 1.1421e-04
Loss = 3.3277e-01, PNorm = 63.7272, GNorm = 1.9163, lr_0 = 1.1413e-04
Loss = 3.5386e-01, PNorm = 63.7272, GNorm = 1.3819, lr_0 = 1.1405e-04
Loss = 3.8375e-01, PNorm = 63.7271, GNorm = 1.5013, lr_0 = 1.1398e-04
Loss = 3.4609e-01, PNorm = 63.7270, GNorm = 1.4730, lr_0 = 1.1390e-04
Loss = 4.1417e-01, PNorm = 63.7281, GNorm = 1.4149, lr_0 = 1.1382e-04
Loss = 3.3356e-01, PNorm = 63.7284, GNorm = 1.4110, lr_0 = 1.1374e-04
Loss = 2.9518e-01, PNorm = 63.7291, GNorm = 1.3043, lr_0 = 1.1366e-04
Loss = 3.5939e-01, PNorm = 63.7298, GNorm = 1.6826, lr_0 = 1.1359e-04
Loss = 3.3481e-01, PNorm = 63.7299, GNorm = 1.5126, lr_0 = 1.1351e-04
Loss = 3.4179e-01, PNorm = 63.7305, GNorm = 1.9787, lr_0 = 1.1343e-04
Loss = 3.5340e-01, PNorm = 63.7317, GNorm = 1.4764, lr_0 = 1.1335e-04
Loss = 3.4170e-01, PNorm = 63.7319, GNorm = 2.4388, lr_0 = 1.1328e-04
Loss = 3.5372e-01, PNorm = 63.7335, GNorm = 1.6830, lr_0 = 1.1320e-04
Loss = 3.2319e-01, PNorm = 63.7358, GNorm = 1.5912, lr_0 = 1.1312e-04
Loss = 3.4066e-01, PNorm = 63.7380, GNorm = 1.3357, lr_0 = 1.1304e-04
Loss = 3.8650e-01, PNorm = 63.7387, GNorm = 1.7010, lr_0 = 1.1297e-04
Loss = 3.8575e-01, PNorm = 63.7390, GNorm = 1.2302, lr_0 = 1.1289e-04
Loss = 3.1294e-01, PNorm = 63.7416, GNorm = 1.3435, lr_0 = 1.1281e-04
Loss = 3.6168e-01, PNorm = 63.7416, GNorm = 1.1616, lr_0 = 1.1273e-04
Loss = 3.7369e-01, PNorm = 63.7417, GNorm = 1.7264, lr_0 = 1.1266e-04
Loss = 3.2858e-01, PNorm = 63.7448, GNorm = 1.3869, lr_0 = 1.1258e-04
Loss = 3.6515e-01, PNorm = 63.7474, GNorm = 1.4131, lr_0 = 1.1250e-04
Loss = 2.8137e-01, PNorm = 63.7481, GNorm = 1.8127, lr_0 = 1.1243e-04
Loss = 3.5189e-01, PNorm = 63.7481, GNorm = 1.3492, lr_0 = 1.1235e-04
Loss = 3.7809e-01, PNorm = 63.7481, GNorm = 1.6792, lr_0 = 1.1227e-04
Loss = 3.3807e-01, PNorm = 63.7482, GNorm = 2.1965, lr_0 = 1.1219e-04
Loss = 3.1997e-01, PNorm = 63.7495, GNorm = 1.6920, lr_0 = 1.1212e-04
Loss = 3.4508e-01, PNorm = 63.7513, GNorm = 2.1395, lr_0 = 1.1204e-04
Loss = 3.7662e-01, PNorm = 63.7509, GNorm = 1.6725, lr_0 = 1.1196e-04
Loss = 3.5142e-01, PNorm = 63.7519, GNorm = 1.3949, lr_0 = 1.1189e-04
Loss = 3.2468e-01, PNorm = 63.7550, GNorm = 1.4900, lr_0 = 1.1181e-04
Loss = 3.4506e-01, PNorm = 63.7561, GNorm = 1.7665, lr_0 = 1.1173e-04
Loss = 3.4852e-01, PNorm = 63.7556, GNorm = 2.0738, lr_0 = 1.1166e-04
Loss = 3.5731e-01, PNorm = 63.7575, GNorm = 1.4001, lr_0 = 1.1158e-04
Loss = 3.8448e-01, PNorm = 63.7598, GNorm = 1.8239, lr_0 = 1.1150e-04
Loss = 3.2936e-01, PNorm = 63.7608, GNorm = 1.4624, lr_0 = 1.1143e-04
Loss = 3.5597e-01, PNorm = 63.7616, GNorm = 1.1967, lr_0 = 1.1135e-04
Loss = 3.6772e-01, PNorm = 63.7621, GNorm = 1.3106, lr_0 = 1.1128e-04
Loss = 3.4082e-01, PNorm = 63.7620, GNorm = 1.6070, lr_0 = 1.1120e-04
Loss = 3.2043e-01, PNorm = 63.7617, GNorm = 2.3737, lr_0 = 1.1112e-04
Loss = 3.1512e-01, PNorm = 63.7634, GNorm = 1.3590, lr_0 = 1.1105e-04
Loss = 3.4918e-01, PNorm = 63.7643, GNorm = 1.2178, lr_0 = 1.1097e-04
Loss = 3.3459e-01, PNorm = 63.7646, GNorm = 1.5514, lr_0 = 1.1089e-04
Loss = 4.0190e-01, PNorm = 63.7664, GNorm = 1.6465, lr_0 = 1.1082e-04
Loss = 3.5125e-01, PNorm = 63.7684, GNorm = 2.0804, lr_0 = 1.1074e-04
Loss = 3.6320e-01, PNorm = 63.7698, GNorm = 1.2885, lr_0 = 1.1067e-04
Loss = 3.2355e-01, PNorm = 63.7695, GNorm = 1.5197, lr_0 = 1.1059e-04
Loss = 3.0537e-01, PNorm = 63.7709, GNorm = 1.3921, lr_0 = 1.1052e-04
Loss = 3.5039e-01, PNorm = 63.7721, GNorm = 1.3928, lr_0 = 1.1044e-04
Loss = 3.3289e-01, PNorm = 63.7725, GNorm = 1.6754, lr_0 = 1.1036e-04
Loss = 3.1986e-01, PNorm = 63.7742, GNorm = 1.8266, lr_0 = 1.1029e-04
Loss = 3.1081e-01, PNorm = 63.7763, GNorm = 1.4869, lr_0 = 1.1021e-04
Loss = 3.3515e-01, PNorm = 63.7784, GNorm = 1.1748, lr_0 = 1.1014e-04
Loss = 3.5678e-01, PNorm = 63.7791, GNorm = 1.8076, lr_0 = 1.1006e-04
Loss = 3.0867e-01, PNorm = 63.7797, GNorm = 1.1719, lr_0 = 1.0999e-04
Loss = 3.4467e-01, PNorm = 63.7800, GNorm = 1.4138, lr_0 = 1.0991e-04
Loss = 3.4418e-01, PNorm = 63.7800, GNorm = 1.9465, lr_0 = 1.0984e-04
Loss = 3.2522e-01, PNorm = 63.7813, GNorm = 1.3014, lr_0 = 1.0976e-04
Loss = 3.2375e-01, PNorm = 63.7828, GNorm = 1.5096, lr_0 = 1.0969e-04
Loss = 2.9670e-01, PNorm = 63.7847, GNorm = 1.0213, lr_0 = 1.0961e-04
Loss = 3.6856e-01, PNorm = 63.7885, GNorm = 1.0384, lr_0 = 1.0954e-04
Loss = 3.0772e-01, PNorm = 63.7905, GNorm = 1.7013, lr_0 = 1.0946e-04
Loss = 3.3312e-01, PNorm = 63.7903, GNorm = 1.3641, lr_0 = 1.0939e-04
Loss = 2.9362e-01, PNorm = 63.7922, GNorm = 1.2924, lr_0 = 1.0931e-04
Loss = 3.2701e-01, PNorm = 63.7934, GNorm = 1.3765, lr_0 = 1.0924e-04
Loss = 3.5428e-01, PNorm = 63.7936, GNorm = 1.1213, lr_0 = 1.0916e-04
Loss = 3.5623e-01, PNorm = 63.7938, GNorm = 1.3920, lr_0 = 1.0909e-04
Loss = 3.8389e-01, PNorm = 63.7941, GNorm = 1.0397, lr_0 = 1.0901e-04
Loss = 3.0693e-01, PNorm = 63.7948, GNorm = 1.3610, lr_0 = 1.0894e-04
Loss = 3.6309e-01, PNorm = 63.7958, GNorm = 1.7410, lr_0 = 1.0886e-04
Loss = 3.7894e-01, PNorm = 63.7953, GNorm = 1.7654, lr_0 = 1.0879e-04
Loss = 3.4448e-01, PNorm = 63.7968, GNorm = 1.7193, lr_0 = 1.0871e-04
Loss = 3.0676e-01, PNorm = 63.7979, GNorm = 1.1364, lr_0 = 1.0864e-04
Loss = 2.9331e-01, PNorm = 63.7994, GNorm = 1.2125, lr_0 = 1.0856e-04
Validation mae = 0.110915
Epoch 29
Loss = 3.1706e-01, PNorm = 63.7997, GNorm = 1.1923, lr_0 = 1.0849e-04
Loss = 3.6696e-01, PNorm = 63.7994, GNorm = 1.8401, lr_0 = 1.0841e-04
Loss = 4.6764e-01, PNorm = 63.8005, GNorm = 1.1874, lr_0 = 1.0834e-04
Loss = 3.6945e-01, PNorm = 63.8011, GNorm = 1.3985, lr_0 = 1.0827e-04
Loss = 3.2851e-01, PNorm = 63.8028, GNorm = 1.7122, lr_0 = 1.0819e-04
Loss = 3.2907e-01, PNorm = 63.8054, GNorm = 1.5204, lr_0 = 1.0812e-04
Loss = 3.4561e-01, PNorm = 63.8072, GNorm = 1.1896, lr_0 = 1.0804e-04
Loss = 4.2156e-01, PNorm = 63.8071, GNorm = 1.5515, lr_0 = 1.0797e-04
Loss = 3.5139e-01, PNorm = 63.8082, GNorm = 1.1013, lr_0 = 1.0790e-04
Loss = 3.4153e-01, PNorm = 63.8095, GNorm = 1.8290, lr_0 = 1.0782e-04
Loss = 3.9425e-01, PNorm = 63.8105, GNorm = 1.6179, lr_0 = 1.0775e-04
Loss = 3.3835e-01, PNorm = 63.8111, GNorm = 2.2091, lr_0 = 1.0767e-04
Loss = 3.0898e-01, PNorm = 63.8122, GNorm = 2.0178, lr_0 = 1.0760e-04
Loss = 3.0085e-01, PNorm = 63.8129, GNorm = 1.2896, lr_0 = 1.0753e-04
Loss = 3.3524e-01, PNorm = 63.8144, GNorm = 1.4086, lr_0 = 1.0745e-04
Loss = 3.5064e-01, PNorm = 63.8159, GNorm = 1.1222, lr_0 = 1.0738e-04
Loss = 3.0417e-01, PNorm = 63.8153, GNorm = 1.3046, lr_0 = 1.0731e-04
Loss = 3.7938e-01, PNorm = 63.8155, GNorm = 1.5868, lr_0 = 1.0723e-04
Loss = 3.5526e-01, PNorm = 63.8153, GNorm = 1.3034, lr_0 = 1.0716e-04
Loss = 3.5310e-01, PNorm = 63.8150, GNorm = 1.4572, lr_0 = 1.0709e-04
Loss = 3.2476e-01, PNorm = 63.8153, GNorm = 1.7757, lr_0 = 1.0701e-04
Loss = 3.4212e-01, PNorm = 63.8159, GNorm = 1.1866, lr_0 = 1.0694e-04
Loss = 3.2683e-01, PNorm = 63.8166, GNorm = 1.4344, lr_0 = 1.0687e-04
Loss = 3.5226e-01, PNorm = 63.8178, GNorm = 1.2703, lr_0 = 1.0679e-04
Loss = 3.3979e-01, PNorm = 63.8200, GNorm = 1.0857, lr_0 = 1.0672e-04
Loss = 3.6312e-01, PNorm = 63.8208, GNorm = 1.2376, lr_0 = 1.0665e-04
Loss = 3.4251e-01, PNorm = 63.8208, GNorm = 1.1841, lr_0 = 1.0657e-04
Loss = 3.5538e-01, PNorm = 63.8206, GNorm = 1.6620, lr_0 = 1.0650e-04
Loss = 3.3789e-01, PNorm = 63.8219, GNorm = 1.4654, lr_0 = 1.0643e-04
Loss = 3.6307e-01, PNorm = 63.8245, GNorm = 1.6891, lr_0 = 1.0635e-04
Loss = 3.2328e-01, PNorm = 63.8262, GNorm = 1.7436, lr_0 = 1.0628e-04
Loss = 3.8149e-01, PNorm = 63.8283, GNorm = 1.2184, lr_0 = 1.0621e-04
Loss = 3.5855e-01, PNorm = 63.8304, GNorm = 1.3992, lr_0 = 1.0614e-04
Loss = 4.4033e-01, PNorm = 63.8319, GNorm = 1.7329, lr_0 = 1.0606e-04
Loss = 3.2802e-01, PNorm = 63.8331, GNorm = 2.2941, lr_0 = 1.0599e-04
Loss = 2.7046e-01, PNorm = 63.8345, GNorm = 1.4031, lr_0 = 1.0592e-04
Loss = 2.9980e-01, PNorm = 63.8355, GNorm = 1.7292, lr_0 = 1.0585e-04
Loss = 3.2784e-01, PNorm = 63.8359, GNorm = 1.1724, lr_0 = 1.0577e-04
Loss = 2.9890e-01, PNorm = 63.8367, GNorm = 1.6096, lr_0 = 1.0570e-04
Loss = 3.4030e-01, PNorm = 63.8385, GNorm = 1.6685, lr_0 = 1.0563e-04
Loss = 3.7631e-01, PNorm = 63.8383, GNorm = 1.6032, lr_0 = 1.0556e-04
Loss = 3.7855e-01, PNorm = 63.8389, GNorm = 1.2021, lr_0 = 1.0548e-04
Loss = 3.7455e-01, PNorm = 63.8420, GNorm = 1.1183, lr_0 = 1.0541e-04
Loss = 3.3956e-01, PNorm = 63.8443, GNorm = 1.6547, lr_0 = 1.0534e-04
Loss = 3.6644e-01, PNorm = 63.8441, GNorm = 1.4895, lr_0 = 1.0527e-04
Loss = 3.3438e-01, PNorm = 63.8456, GNorm = 1.2572, lr_0 = 1.0519e-04
Loss = 3.5924e-01, PNorm = 63.8467, GNorm = 0.8606, lr_0 = 1.0512e-04
Loss = 3.2061e-01, PNorm = 63.8458, GNorm = 1.4153, lr_0 = 1.0505e-04
Loss = 3.8483e-01, PNorm = 63.8450, GNorm = 1.1179, lr_0 = 1.0498e-04
Loss = 3.9796e-01, PNorm = 63.8460, GNorm = 1.6177, lr_0 = 1.0491e-04
Loss = 3.6352e-01, PNorm = 63.8474, GNorm = 1.6587, lr_0 = 1.0483e-04
Loss = 3.0531e-01, PNorm = 63.8490, GNorm = 1.4225, lr_0 = 1.0476e-04
Loss = 3.5514e-01, PNorm = 63.8501, GNorm = 1.6541, lr_0 = 1.0469e-04
Loss = 3.2260e-01, PNorm = 63.8527, GNorm = 1.2353, lr_0 = 1.0462e-04
Loss = 3.5415e-01, PNorm = 63.8551, GNorm = 0.9795, lr_0 = 1.0455e-04
Loss = 3.2682e-01, PNorm = 63.8547, GNorm = 2.9454, lr_0 = 1.0448e-04
Loss = 3.2745e-01, PNorm = 63.8545, GNorm = 1.3732, lr_0 = 1.0440e-04
Loss = 3.4185e-01, PNorm = 63.8556, GNorm = 1.1971, lr_0 = 1.0433e-04
Loss = 3.6264e-01, PNorm = 63.8554, GNorm = 1.1991, lr_0 = 1.0426e-04
Loss = 3.3422e-01, PNorm = 63.8561, GNorm = 1.3871, lr_0 = 1.0419e-04
Loss = 2.9506e-01, PNorm = 63.8583, GNorm = 1.4338, lr_0 = 1.0412e-04
Loss = 3.3439e-01, PNorm = 63.8586, GNorm = 1.7147, lr_0 = 1.0405e-04
Loss = 3.0713e-01, PNorm = 63.8583, GNorm = 1.0762, lr_0 = 1.0398e-04
Loss = 3.3513e-01, PNorm = 63.8586, GNorm = 1.6938, lr_0 = 1.0391e-04
Loss = 3.1912e-01, PNorm = 63.8601, GNorm = 1.2847, lr_0 = 1.0383e-04
Loss = 3.2362e-01, PNorm = 63.8630, GNorm = 1.6145, lr_0 = 1.0376e-04
Loss = 3.5831e-01, PNorm = 63.8628, GNorm = 1.7887, lr_0 = 1.0369e-04
Loss = 3.2956e-01, PNorm = 63.8610, GNorm = 1.0195, lr_0 = 1.0362e-04
Loss = 3.5036e-01, PNorm = 63.8617, GNorm = 1.1585, lr_0 = 1.0355e-04
Loss = 3.6129e-01, PNorm = 63.8638, GNorm = 1.2950, lr_0 = 1.0348e-04
Loss = 3.7301e-01, PNorm = 63.8647, GNorm = 1.5598, lr_0 = 1.0341e-04
Loss = 3.5339e-01, PNorm = 63.8648, GNorm = 1.7229, lr_0 = 1.0334e-04
Loss = 3.1845e-01, PNorm = 63.8664, GNorm = 1.4416, lr_0 = 1.0327e-04
Loss = 3.1257e-01, PNorm = 63.8681, GNorm = 2.2322, lr_0 = 1.0320e-04
Loss = 3.0989e-01, PNorm = 63.8698, GNorm = 1.6063, lr_0 = 1.0312e-04
Loss = 2.9760e-01, PNorm = 63.8703, GNorm = 1.6194, lr_0 = 1.0305e-04
Loss = 3.5874e-01, PNorm = 63.8706, GNorm = 1.3893, lr_0 = 1.0298e-04
Loss = 3.3228e-01, PNorm = 63.8713, GNorm = 1.3931, lr_0 = 1.0291e-04
Loss = 3.1595e-01, PNorm = 63.8726, GNorm = 1.2555, lr_0 = 1.0284e-04
Loss = 3.5003e-01, PNorm = 63.8731, GNorm = 1.5171, lr_0 = 1.0277e-04
Loss = 3.0605e-01, PNorm = 63.8733, GNorm = 1.4713, lr_0 = 1.0270e-04
Loss = 3.3491e-01, PNorm = 63.8748, GNorm = 1.1453, lr_0 = 1.0263e-04
Loss = 3.1888e-01, PNorm = 63.8750, GNorm = 1.9497, lr_0 = 1.0256e-04
Loss = 3.6514e-01, PNorm = 63.8759, GNorm = 1.3417, lr_0 = 1.0249e-04
Loss = 3.2700e-01, PNorm = 63.8768, GNorm = 1.4962, lr_0 = 1.0242e-04
Loss = 3.4597e-01, PNorm = 63.8771, GNorm = 1.9267, lr_0 = 1.0235e-04
Loss = 3.4543e-01, PNorm = 63.8777, GNorm = 1.5274, lr_0 = 1.0228e-04
Loss = 2.8596e-01, PNorm = 63.8781, GNorm = 1.1280, lr_0 = 1.0221e-04
Loss = 3.1994e-01, PNorm = 63.8782, GNorm = 1.7975, lr_0 = 1.0214e-04
Loss = 3.2877e-01, PNorm = 63.8795, GNorm = 1.6566, lr_0 = 1.0207e-04
Loss = 3.5926e-01, PNorm = 63.8799, GNorm = 1.9488, lr_0 = 1.0200e-04
Loss = 3.5021e-01, PNorm = 63.8806, GNorm = 1.6573, lr_0 = 1.0193e-04
Loss = 3.5873e-01, PNorm = 63.8832, GNorm = 0.9534, lr_0 = 1.0186e-04
Loss = 3.6279e-01, PNorm = 63.8842, GNorm = 0.9836, lr_0 = 1.0179e-04
Loss = 3.7843e-01, PNorm = 63.8844, GNorm = 1.6705, lr_0 = 1.0172e-04
Loss = 3.2887e-01, PNorm = 63.8853, GNorm = 1.5680, lr_0 = 1.0165e-04
Loss = 3.4387e-01, PNorm = 63.8876, GNorm = 1.7966, lr_0 = 1.0158e-04
Loss = 3.8680e-01, PNorm = 63.8898, GNorm = 2.3655, lr_0 = 1.0151e-04
Loss = 3.1913e-01, PNorm = 63.8900, GNorm = 1.1293, lr_0 = 1.0144e-04
Loss = 2.9572e-01, PNorm = 63.8907, GNorm = 1.2356, lr_0 = 1.0137e-04
Loss = 3.8938e-01, PNorm = 63.8926, GNorm = 1.1161, lr_0 = 1.0130e-04
Loss = 3.1847e-01, PNorm = 63.8942, GNorm = 1.2219, lr_0 = 1.0123e-04
Loss = 3.9450e-01, PNorm = 63.8958, GNorm = 1.8421, lr_0 = 1.0116e-04
Loss = 2.8609e-01, PNorm = 63.8960, GNorm = 1.2679, lr_0 = 1.0110e-04
Loss = 3.1758e-01, PNorm = 63.8967, GNorm = 1.7483, lr_0 = 1.0103e-04
Loss = 3.1307e-01, PNorm = 63.8966, GNorm = 1.0994, lr_0 = 1.0096e-04
Loss = 3.6121e-01, PNorm = 63.8970, GNorm = 1.5549, lr_0 = 1.0089e-04
Loss = 3.2770e-01, PNorm = 63.8970, GNorm = 1.4219, lr_0 = 1.0082e-04
Loss = 3.6912e-01, PNorm = 63.8989, GNorm = 1.1216, lr_0 = 1.0075e-04
Loss = 3.1814e-01, PNorm = 63.9012, GNorm = 1.2652, lr_0 = 1.0068e-04
Loss = 3.3358e-01, PNorm = 63.9017, GNorm = 1.6072, lr_0 = 1.0061e-04
Loss = 3.4942e-01, PNorm = 63.9020, GNorm = 1.9469, lr_0 = 1.0054e-04
Loss = 4.0963e-01, PNorm = 63.9030, GNorm = 1.4251, lr_0 = 1.0047e-04
Loss = 3.1998e-01, PNorm = 63.9047, GNorm = 1.3715, lr_0 = 1.0041e-04
Loss = 3.4740e-01, PNorm = 63.9054, GNorm = 1.6635, lr_0 = 1.0034e-04
Loss = 3.8232e-01, PNorm = 63.9057, GNorm = 1.8936, lr_0 = 1.0027e-04
Loss = 2.9970e-01, PNorm = 63.9062, GNorm = 1.6507, lr_0 = 1.0020e-04
Loss = 3.8193e-01, PNorm = 63.9059, GNorm = 2.3410, lr_0 = 1.0013e-04
Loss = 3.1596e-01, PNorm = 63.9064, GNorm = 1.1143, lr_0 = 1.0006e-04
Loss = 3.7780e-01, PNorm = 63.9073, GNorm = 1.5194, lr_0 = 1.0000e-04
Validation mae = 0.110808
Model 0 best validation mae = 0.110806 on epoch 24
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.110745
Ensemble test mae = 0.110745
Fold 3
Splitting data with seed 3
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=300, bias=False)
        (W_h): Linear(in_features=300, out_features=300, bias=False)
        (W_o): Linear(in_features=433, out_features=300, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.2, inplace=False)
    (1): Linear(in_features=300, out_features=300, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=300, out_features=300, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.2, inplace=False)
    (7): Linear(in_features=300, out_features=1, bias=True)
  )
)
Number of parameters = 445,201
Moving model to cuda
Epoch 0
Loss = 1.1352e+00, PNorm = 38.1710, GNorm = 3.9123, lr_0 = 1.0413e-04
Loss = 1.0029e+00, PNorm = 38.1709, GNorm = 4.0037, lr_0 = 1.0788e-04
Loss = 9.2097e-01, PNorm = 38.1712, GNorm = 2.9262, lr_0 = 1.1163e-04
Loss = 9.6596e-01, PNorm = 38.1719, GNorm = 2.1370, lr_0 = 1.1537e-04
Loss = 9.4816e-01, PNorm = 38.1730, GNorm = 2.6681, lr_0 = 1.1913e-04
Loss = 1.0258e+00, PNorm = 38.1740, GNorm = 4.0350, lr_0 = 1.2287e-04
Loss = 1.0944e+00, PNorm = 38.1747, GNorm = 2.8500, lr_0 = 1.2663e-04
Loss = 8.2019e-01, PNorm = 38.1756, GNorm = 2.2810, lr_0 = 1.3038e-04
Loss = 7.6803e-01, PNorm = 38.1765, GNorm = 2.2771, lr_0 = 1.3413e-04
Loss = 7.5930e-01, PNorm = 38.1775, GNorm = 3.7232, lr_0 = 1.3788e-04
Loss = 9.0437e-01, PNorm = 38.1784, GNorm = 2.0557, lr_0 = 1.4163e-04
Loss = 9.3005e-01, PNorm = 38.1802, GNorm = 3.5281, lr_0 = 1.4537e-04
Loss = 8.4131e-01, PNorm = 38.1822, GNorm = 4.6554, lr_0 = 1.4913e-04
Loss = 8.6345e-01, PNorm = 38.1840, GNorm = 3.5209, lr_0 = 1.5288e-04
Loss = 9.0665e-01, PNorm = 38.1861, GNorm = 2.0184, lr_0 = 1.5662e-04
Loss = 7.5012e-01, PNorm = 38.1866, GNorm = 3.1401, lr_0 = 1.6038e-04
Loss = 8.7828e-01, PNorm = 38.1883, GNorm = 5.1054, lr_0 = 1.6412e-04
Loss = 9.1903e-01, PNorm = 38.1896, GNorm = 1.8749, lr_0 = 1.6788e-04
Loss = 8.5935e-01, PNorm = 38.1918, GNorm = 2.0705, lr_0 = 1.7163e-04
Loss = 7.8238e-01, PNorm = 38.1953, GNorm = 3.2514, lr_0 = 1.7538e-04
Loss = 8.5039e-01, PNorm = 38.1981, GNorm = 7.1580, lr_0 = 1.7913e-04
Loss = 8.0741e-01, PNorm = 38.1994, GNorm = 3.0638, lr_0 = 1.8288e-04
Loss = 7.1130e-01, PNorm = 38.2017, GNorm = 3.6314, lr_0 = 1.8662e-04
Loss = 8.5764e-01, PNorm = 38.2047, GNorm = 1.3360, lr_0 = 1.9038e-04
Loss = 7.9620e-01, PNorm = 38.2074, GNorm = 1.7733, lr_0 = 1.9413e-04
Loss = 7.2016e-01, PNorm = 38.2117, GNorm = 2.9566, lr_0 = 1.9788e-04
Loss = 7.6973e-01, PNorm = 38.2151, GNorm = 1.8690, lr_0 = 2.0163e-04
Loss = 7.2661e-01, PNorm = 38.2186, GNorm = 5.9796, lr_0 = 2.0537e-04
Loss = 7.8861e-01, PNorm = 38.2212, GNorm = 8.1792, lr_0 = 2.0913e-04
Loss = 7.6926e-01, PNorm = 38.2238, GNorm = 7.2374, lr_0 = 2.1288e-04
Loss = 7.1214e-01, PNorm = 38.2253, GNorm = 5.4783, lr_0 = 2.1663e-04
Loss = 7.5890e-01, PNorm = 38.2273, GNorm = 1.9619, lr_0 = 2.2038e-04
Loss = 6.4410e-01, PNorm = 38.2309, GNorm = 1.6440, lr_0 = 2.2412e-04
Loss = 7.0459e-01, PNorm = 38.2352, GNorm = 5.8916, lr_0 = 2.2787e-04
Loss = 7.8984e-01, PNorm = 38.2375, GNorm = 4.0219, lr_0 = 2.3163e-04
Loss = 7.9093e-01, PNorm = 38.2400, GNorm = 9.2692, lr_0 = 2.3538e-04
Loss = 7.2302e-01, PNorm = 38.2443, GNorm = 1.8064, lr_0 = 2.3913e-04
Loss = 6.6529e-01, PNorm = 38.2484, GNorm = 7.1962, lr_0 = 2.4288e-04
Loss = 7.2771e-01, PNorm = 38.2522, GNorm = 3.2584, lr_0 = 2.4662e-04
Loss = 7.7813e-01, PNorm = 38.2573, GNorm = 6.3587, lr_0 = 2.5038e-04
Loss = 7.9806e-01, PNorm = 38.2613, GNorm = 15.5434, lr_0 = 2.5413e-04
Loss = 8.2814e-01, PNorm = 38.2628, GNorm = 12.9111, lr_0 = 2.5788e-04
Loss = 7.4817e-01, PNorm = 38.2651, GNorm = 6.4472, lr_0 = 2.6163e-04
Loss = 8.2887e-01, PNorm = 38.2686, GNorm = 2.1704, lr_0 = 2.6537e-04
Loss = 7.0686e-01, PNorm = 38.2758, GNorm = 3.2378, lr_0 = 2.6912e-04
Loss = 6.6194e-01, PNorm = 38.2794, GNorm = 1.7883, lr_0 = 2.7288e-04
Loss = 7.0136e-01, PNorm = 38.2817, GNorm = 3.6503, lr_0 = 2.7663e-04
Loss = 6.9708e-01, PNorm = 38.2839, GNorm = 1.7095, lr_0 = 2.8038e-04
Loss = 7.2685e-01, PNorm = 38.2888, GNorm = 3.0017, lr_0 = 2.8413e-04
Loss = 6.6464e-01, PNorm = 38.2919, GNorm = 7.9963, lr_0 = 2.8787e-04
Loss = 6.8546e-01, PNorm = 38.2948, GNorm = 2.9737, lr_0 = 2.9163e-04
Loss = 6.1201e-01, PNorm = 38.2975, GNorm = 7.2411, lr_0 = 2.9538e-04
Loss = 6.5608e-01, PNorm = 38.3002, GNorm = 12.5195, lr_0 = 2.9913e-04
Loss = 6.3333e-01, PNorm = 38.3034, GNorm = 5.0219, lr_0 = 3.0288e-04
Loss = 6.4223e-01, PNorm = 38.3088, GNorm = 1.5132, lr_0 = 3.0662e-04
Loss = 6.1882e-01, PNorm = 38.3126, GNorm = 5.6569, lr_0 = 3.1037e-04
Loss = 6.1875e-01, PNorm = 38.3164, GNorm = 9.8751, lr_0 = 3.1413e-04
Loss = 7.4006e-01, PNorm = 38.3222, GNorm = 3.4665, lr_0 = 3.1788e-04
Loss = 6.8671e-01, PNorm = 38.3247, GNorm = 3.9254, lr_0 = 3.2163e-04
Loss = 6.8655e-01, PNorm = 38.3288, GNorm = 3.6119, lr_0 = 3.2538e-04
Loss = 7.6877e-01, PNorm = 38.3319, GNorm = 5.0887, lr_0 = 3.2912e-04
Loss = 6.5162e-01, PNorm = 38.3355, GNorm = 2.1700, lr_0 = 3.3288e-04
Loss = 7.8044e-01, PNorm = 38.3410, GNorm = 1.9546, lr_0 = 3.3663e-04
Loss = 6.3516e-01, PNorm = 38.3464, GNorm = 5.1416, lr_0 = 3.4038e-04
Loss = 6.1926e-01, PNorm = 38.3516, GNorm = 4.2469, lr_0 = 3.4413e-04
Loss = 7.0001e-01, PNorm = 38.3542, GNorm = 4.3655, lr_0 = 3.4787e-04
Loss = 7.0984e-01, PNorm = 38.3592, GNorm = 1.5752, lr_0 = 3.5162e-04
Loss = 6.4476e-01, PNorm = 38.3628, GNorm = 3.5913, lr_0 = 3.5538e-04
Loss = 6.1480e-01, PNorm = 38.3700, GNorm = 1.7286, lr_0 = 3.5913e-04
Loss = 6.2937e-01, PNorm = 38.3746, GNorm = 3.8199, lr_0 = 3.6288e-04
Loss = 6.8347e-01, PNorm = 38.3804, GNorm = 2.1885, lr_0 = 3.6662e-04
Loss = 6.2424e-01, PNorm = 38.3877, GNorm = 3.0694, lr_0 = 3.7037e-04
Loss = 7.5642e-01, PNorm = 38.3925, GNorm = 5.4467, lr_0 = 3.7413e-04
Loss = 6.3750e-01, PNorm = 38.3986, GNorm = 2.9302, lr_0 = 3.7788e-04
Loss = 8.0126e-01, PNorm = 38.4026, GNorm = 5.6671, lr_0 = 3.8163e-04
Loss = 6.2079e-01, PNorm = 38.4095, GNorm = 1.8455, lr_0 = 3.8537e-04
Loss = 6.4317e-01, PNorm = 38.4174, GNorm = 3.2687, lr_0 = 3.8912e-04
Loss = 6.6896e-01, PNorm = 38.4239, GNorm = 1.6133, lr_0 = 3.9287e-04
Loss = 5.8987e-01, PNorm = 38.4298, GNorm = 2.4684, lr_0 = 3.9663e-04
Loss = 6.3264e-01, PNorm = 38.4359, GNorm = 1.5561, lr_0 = 4.0038e-04
Loss = 6.0648e-01, PNorm = 38.4399, GNorm = 3.0702, lr_0 = 4.0413e-04
Loss = 6.0575e-01, PNorm = 38.4429, GNorm = 1.7914, lr_0 = 4.0787e-04
Loss = 7.1157e-01, PNorm = 38.4470, GNorm = 3.6465, lr_0 = 4.1162e-04
Loss = 7.1618e-01, PNorm = 38.4531, GNorm = 1.2652, lr_0 = 4.1537e-04
Loss = 6.1338e-01, PNorm = 38.4597, GNorm = 2.3452, lr_0 = 4.1913e-04
Loss = 7.2850e-01, PNorm = 38.4674, GNorm = 1.6925, lr_0 = 4.2288e-04
Loss = 6.8496e-01, PNorm = 38.4748, GNorm = 7.0743, lr_0 = 4.2662e-04
Loss = 6.2757e-01, PNorm = 38.4816, GNorm = 2.5853, lr_0 = 4.3037e-04
Loss = 7.1496e-01, PNorm = 38.4872, GNorm = 1.2040, lr_0 = 4.3412e-04
Loss = 6.3118e-01, PNorm = 38.4966, GNorm = 1.4006, lr_0 = 4.3788e-04
Loss = 6.3318e-01, PNorm = 38.5033, GNorm = 2.1199, lr_0 = 4.4163e-04
Loss = 6.5848e-01, PNorm = 38.5122, GNorm = 6.0289, lr_0 = 4.4538e-04
Loss = 6.7642e-01, PNorm = 38.5212, GNorm = 1.9617, lr_0 = 4.4912e-04
Loss = 7.0096e-01, PNorm = 38.5258, GNorm = 7.8944, lr_0 = 4.5287e-04
Loss = 6.9719e-01, PNorm = 38.5333, GNorm = 5.3388, lr_0 = 4.5662e-04
Loss = 6.7185e-01, PNorm = 38.5399, GNorm = 2.3818, lr_0 = 4.6038e-04
Loss = 6.4616e-01, PNorm = 38.5487, GNorm = 4.3081, lr_0 = 4.6413e-04
Loss = 5.6129e-01, PNorm = 38.5537, GNorm = 1.2524, lr_0 = 4.6787e-04
Loss = 5.9788e-01, PNorm = 38.5609, GNorm = 1.9743, lr_0 = 4.7162e-04
Loss = 6.0987e-01, PNorm = 38.5663, GNorm = 7.4813, lr_0 = 4.7537e-04
Loss = 5.6935e-01, PNorm = 38.5737, GNorm = 1.2511, lr_0 = 4.7913e-04
Loss = 5.7708e-01, PNorm = 38.5846, GNorm = 3.2293, lr_0 = 4.8288e-04
Loss = 6.0507e-01, PNorm = 38.5899, GNorm = 5.3374, lr_0 = 4.8663e-04
Loss = 6.6722e-01, PNorm = 38.5946, GNorm = 4.8405, lr_0 = 4.9038e-04
Loss = 6.4797e-01, PNorm = 38.6041, GNorm = 1.5966, lr_0 = 4.9412e-04
Loss = 6.5366e-01, PNorm = 38.6138, GNorm = 1.6939, lr_0 = 4.9788e-04
Loss = 7.3111e-01, PNorm = 38.6206, GNorm = 6.2335, lr_0 = 5.0163e-04
Loss = 5.9964e-01, PNorm = 38.6277, GNorm = 2.2218, lr_0 = 5.0538e-04
Loss = 6.1049e-01, PNorm = 38.6334, GNorm = 4.3330, lr_0 = 5.0913e-04
Loss = 6.7513e-01, PNorm = 38.6402, GNorm = 3.2527, lr_0 = 5.1287e-04
Loss = 6.2892e-01, PNorm = 38.6525, GNorm = 9.2569, lr_0 = 5.1663e-04
Loss = 5.8684e-01, PNorm = 38.6517, GNorm = 1.4228, lr_0 = 5.2038e-04
Loss = 6.9165e-01, PNorm = 38.6603, GNorm = 4.9355, lr_0 = 5.2413e-04
Loss = 7.0100e-01, PNorm = 38.6701, GNorm = 2.4720, lr_0 = 5.2788e-04
Loss = 5.8136e-01, PNorm = 38.6814, GNorm = 1.2374, lr_0 = 5.3162e-04
Loss = 5.9983e-01, PNorm = 38.6876, GNorm = 1.1660, lr_0 = 5.3538e-04
Loss = 5.4810e-01, PNorm = 38.6978, GNorm = 3.2646, lr_0 = 5.3912e-04
Loss = 6.5216e-01, PNorm = 38.7045, GNorm = 2.2530, lr_0 = 5.4288e-04
Loss = 6.1253e-01, PNorm = 38.7106, GNorm = 2.5130, lr_0 = 5.4663e-04
Loss = 6.3229e-01, PNorm = 38.7216, GNorm = 2.0576, lr_0 = 5.5038e-04
Validation mae = 0.141939
Epoch 1
Loss = 6.2268e-01, PNorm = 38.7372, GNorm = 3.4537, lr_0 = 5.5413e-04
Loss = 6.7693e-01, PNorm = 38.7485, GNorm = 3.7118, lr_0 = 5.5787e-04
Loss = 6.5254e-01, PNorm = 38.7583, GNorm = 2.3578, lr_0 = 5.6163e-04
Loss = 5.9563e-01, PNorm = 38.7695, GNorm = 12.2425, lr_0 = 5.6538e-04
Loss = 6.4482e-01, PNorm = 38.7802, GNorm = 1.7941, lr_0 = 5.6913e-04
Loss = 7.1171e-01, PNorm = 38.7967, GNorm = 1.5740, lr_0 = 5.7288e-04
Loss = 6.7966e-01, PNorm = 38.7971, GNorm = 2.5568, lr_0 = 5.7662e-04
Loss = 6.4240e-01, PNorm = 38.8095, GNorm = 3.1232, lr_0 = 5.8038e-04
Loss = 6.1044e-01, PNorm = 38.8217, GNorm = 1.3752, lr_0 = 5.8413e-04
Loss = 7.0085e-01, PNorm = 38.8263, GNorm = 1.6812, lr_0 = 5.8788e-04
Loss = 5.9824e-01, PNorm = 38.8363, GNorm = 5.0670, lr_0 = 5.9163e-04
Loss = 5.9705e-01, PNorm = 38.8475, GNorm = 1.6296, lr_0 = 5.9538e-04
Loss = 6.7803e-01, PNorm = 38.8625, GNorm = 2.1927, lr_0 = 5.9913e-04
Loss = 5.1089e-01, PNorm = 38.8717, GNorm = 0.8447, lr_0 = 6.0288e-04
Loss = 6.4988e-01, PNorm = 38.8822, GNorm = 1.5864, lr_0 = 6.0663e-04
Loss = 5.8563e-01, PNorm = 38.8939, GNorm = 3.4218, lr_0 = 6.1038e-04
Loss = 5.7246e-01, PNorm = 38.9009, GNorm = 7.2998, lr_0 = 6.1413e-04
Loss = 5.6730e-01, PNorm = 38.9093, GNorm = 3.6156, lr_0 = 6.1788e-04
Loss = 5.7593e-01, PNorm = 38.9181, GNorm = 1.5444, lr_0 = 6.2163e-04
Loss = 6.1581e-01, PNorm = 38.9290, GNorm = 6.9541, lr_0 = 6.2538e-04
Loss = 6.3751e-01, PNorm = 38.9392, GNorm = 1.7805, lr_0 = 6.2913e-04
Loss = 6.9925e-01, PNorm = 38.9536, GNorm = 2.6741, lr_0 = 6.3288e-04
Loss = 5.8660e-01, PNorm = 38.9701, GNorm = 1.8973, lr_0 = 6.3663e-04
Loss = 6.2514e-01, PNorm = 38.9862, GNorm = 3.8583, lr_0 = 6.4038e-04
Loss = 6.3042e-01, PNorm = 39.0040, GNorm = 4.3468, lr_0 = 6.4413e-04
Loss = 6.3994e-01, PNorm = 39.0182, GNorm = 4.4547, lr_0 = 6.4788e-04
Loss = 5.5923e-01, PNorm = 39.0246, GNorm = 1.2079, lr_0 = 6.5163e-04
Loss = 5.8415e-01, PNorm = 39.0336, GNorm = 5.0821, lr_0 = 6.5538e-04
Loss = 6.3863e-01, PNorm = 39.0471, GNorm = 2.4856, lr_0 = 6.5913e-04
Loss = 6.3181e-01, PNorm = 39.0570, GNorm = 3.8103, lr_0 = 6.6288e-04
Loss = 5.2903e-01, PNorm = 39.0654, GNorm = 3.7645, lr_0 = 6.6663e-04
Loss = 6.0720e-01, PNorm = 39.0719, GNorm = 1.1411, lr_0 = 6.7038e-04
Loss = 6.2344e-01, PNorm = 39.0838, GNorm = 7.5239, lr_0 = 6.7413e-04
Loss = 6.7729e-01, PNorm = 39.0967, GNorm = 2.1402, lr_0 = 6.7788e-04
Loss = 6.4447e-01, PNorm = 39.1059, GNorm = 3.4185, lr_0 = 6.8163e-04
Loss = 5.9518e-01, PNorm = 39.1197, GNorm = 1.5750, lr_0 = 6.8538e-04
Loss = 5.8026e-01, PNorm = 39.1304, GNorm = 1.3745, lr_0 = 6.8913e-04
Loss = 5.7433e-01, PNorm = 39.1473, GNorm = 2.2818, lr_0 = 6.9288e-04
Loss = 6.7073e-01, PNorm = 39.1588, GNorm = 4.1823, lr_0 = 6.9663e-04
Loss = 5.6957e-01, PNorm = 39.1725, GNorm = 4.7571, lr_0 = 7.0038e-04
Loss = 5.6621e-01, PNorm = 39.1956, GNorm = 2.0728, lr_0 = 7.0413e-04
Loss = 5.5364e-01, PNorm = 39.2121, GNorm = 3.4834, lr_0 = 7.0788e-04
Loss = 6.2791e-01, PNorm = 39.2217, GNorm = 1.2541, lr_0 = 7.1163e-04
Loss = 5.9328e-01, PNorm = 39.2330, GNorm = 3.8148, lr_0 = 7.1538e-04
Loss = 5.8491e-01, PNorm = 39.2389, GNorm = 1.3588, lr_0 = 7.1913e-04
Loss = 5.5150e-01, PNorm = 39.2551, GNorm = 2.4592, lr_0 = 7.2288e-04
Loss = 5.8131e-01, PNorm = 39.2636, GNorm = 6.1860, lr_0 = 7.2663e-04
Loss = 6.0514e-01, PNorm = 39.2797, GNorm = 2.3264, lr_0 = 7.3038e-04
Loss = 6.4382e-01, PNorm = 39.2928, GNorm = 2.6555, lr_0 = 7.3413e-04
Loss = 5.1834e-01, PNorm = 39.3054, GNorm = 1.3788, lr_0 = 7.3788e-04
Loss = 5.2898e-01, PNorm = 39.3169, GNorm = 4.6566, lr_0 = 7.4163e-04
Loss = 6.4453e-01, PNorm = 39.3300, GNorm = 1.3883, lr_0 = 7.4538e-04
Loss = 5.5426e-01, PNorm = 39.3417, GNorm = 2.2327, lr_0 = 7.4913e-04
Loss = 5.5462e-01, PNorm = 39.3597, GNorm = 1.7789, lr_0 = 7.5288e-04
Loss = 5.7281e-01, PNorm = 39.3705, GNorm = 1.2360, lr_0 = 7.5663e-04
Loss = 5.7006e-01, PNorm = 39.3822, GNorm = 5.4305, lr_0 = 7.6038e-04
Loss = 5.9021e-01, PNorm = 39.3941, GNorm = 3.0811, lr_0 = 7.6413e-04
Loss = 5.4530e-01, PNorm = 39.4097, GNorm = 4.8508, lr_0 = 7.6788e-04
Loss = 5.8608e-01, PNorm = 39.4257, GNorm = 2.3963, lr_0 = 7.7163e-04
Loss = 5.7567e-01, PNorm = 39.4439, GNorm = 1.4698, lr_0 = 7.7538e-04
Loss = 6.0660e-01, PNorm = 39.4563, GNorm = 4.5873, lr_0 = 7.7913e-04
Loss = 6.2558e-01, PNorm = 39.4742, GNorm = 3.7201, lr_0 = 7.8288e-04
Loss = 5.9967e-01, PNorm = 39.4901, GNorm = 4.6631, lr_0 = 7.8663e-04
Loss = 6.4139e-01, PNorm = 39.4981, GNorm = 3.0340, lr_0 = 7.9038e-04
Loss = 5.4334e-01, PNorm = 39.5119, GNorm = 2.1068, lr_0 = 7.9413e-04
Loss = 6.1384e-01, PNorm = 39.5246, GNorm = 2.2088, lr_0 = 7.9788e-04
Loss = 6.2421e-01, PNorm = 39.5417, GNorm = 1.4513, lr_0 = 8.0163e-04
Loss = 6.0855e-01, PNorm = 39.5571, GNorm = 3.0463, lr_0 = 8.0538e-04
Loss = 5.9708e-01, PNorm = 39.5755, GNorm = 1.3096, lr_0 = 8.0913e-04
Loss = 5.6332e-01, PNorm = 39.5895, GNorm = 1.1627, lr_0 = 8.1288e-04
Loss = 5.9529e-01, PNorm = 39.6026, GNorm = 2.3141, lr_0 = 8.1663e-04
Loss = 6.3422e-01, PNorm = 39.6138, GNorm = 4.9928, lr_0 = 8.2038e-04
Loss = 5.5155e-01, PNorm = 39.6341, GNorm = 4.7910, lr_0 = 8.2413e-04
Loss = 5.1395e-01, PNorm = 39.6516, GNorm = 3.4384, lr_0 = 8.2788e-04
Loss = 5.8580e-01, PNorm = 39.6662, GNorm = 2.5988, lr_0 = 8.3163e-04
Loss = 5.1977e-01, PNorm = 39.6821, GNorm = 2.6125, lr_0 = 8.3538e-04
Loss = 6.2065e-01, PNorm = 39.6971, GNorm = 1.7833, lr_0 = 8.3913e-04
Loss = 5.9040e-01, PNorm = 39.7120, GNorm = 3.4211, lr_0 = 8.4288e-04
Loss = 6.0878e-01, PNorm = 39.7206, GNorm = 1.3993, lr_0 = 8.4663e-04
Loss = 5.6978e-01, PNorm = 39.7375, GNorm = 4.4404, lr_0 = 8.5038e-04
Loss = 5.0041e-01, PNorm = 39.7557, GNorm = 2.6124, lr_0 = 8.5413e-04
Loss = 4.7262e-01, PNorm = 39.7701, GNorm = 3.9320, lr_0 = 8.5788e-04
Loss = 5.7536e-01, PNorm = 39.7907, GNorm = 2.8790, lr_0 = 8.6163e-04
Loss = 4.8538e-01, PNorm = 39.8040, GNorm = 1.1514, lr_0 = 8.6538e-04
Loss = 5.8605e-01, PNorm = 39.8227, GNorm = 1.7727, lr_0 = 8.6913e-04
Loss = 5.9045e-01, PNorm = 39.8406, GNorm = 2.1211, lr_0 = 8.7288e-04
Loss = 6.2306e-01, PNorm = 39.8694, GNorm = 3.8082, lr_0 = 8.7663e-04
Loss = 5.8044e-01, PNorm = 39.8811, GNorm = 2.7184, lr_0 = 8.8038e-04
Loss = 6.4633e-01, PNorm = 39.8930, GNorm = 4.2286, lr_0 = 8.8413e-04
Loss = 5.7904e-01, PNorm = 39.9169, GNorm = 6.0959, lr_0 = 8.8788e-04
Loss = 5.6410e-01, PNorm = 39.9392, GNorm = 1.7186, lr_0 = 8.9163e-04
Loss = 6.1700e-01, PNorm = 39.9626, GNorm = 3.6322, lr_0 = 8.9538e-04
Loss = 5.7255e-01, PNorm = 39.9684, GNorm = 1.7600, lr_0 = 8.9913e-04
Loss = 5.4279e-01, PNorm = 39.9919, GNorm = 1.6649, lr_0 = 9.0288e-04
Loss = 6.1267e-01, PNorm = 40.0080, GNorm = 1.2594, lr_0 = 9.0663e-04
Loss = 5.4863e-01, PNorm = 40.0273, GNorm = 2.7748, lr_0 = 9.1038e-04
Loss = 5.7825e-01, PNorm = 40.0527, GNorm = 1.4753, lr_0 = 9.1413e-04
Loss = 5.4473e-01, PNorm = 40.0749, GNorm = 3.3547, lr_0 = 9.1788e-04
Loss = 5.6687e-01, PNorm = 40.0970, GNorm = 1.7662, lr_0 = 9.2163e-04
Loss = 5.2853e-01, PNorm = 40.1185, GNorm = 1.5015, lr_0 = 9.2538e-04
Loss = 5.5187e-01, PNorm = 40.1373, GNorm = 1.1717, lr_0 = 9.2913e-04
Loss = 5.8223e-01, PNorm = 40.1589, GNorm = 1.6050, lr_0 = 9.3288e-04
Loss = 5.2407e-01, PNorm = 40.1789, GNorm = 2.7487, lr_0 = 9.3663e-04
Loss = 5.9636e-01, PNorm = 40.1937, GNorm = 1.7535, lr_0 = 9.4038e-04
Loss = 5.6761e-01, PNorm = 40.2143, GNorm = 1.2625, lr_0 = 9.4413e-04
Loss = 5.4456e-01, PNorm = 40.2375, GNorm = 1.3688, lr_0 = 9.4788e-04
Loss = 6.5929e-01, PNorm = 40.2570, GNorm = 4.4730, lr_0 = 9.5163e-04
Loss = 6.5678e-01, PNorm = 40.2803, GNorm = 3.9402, lr_0 = 9.5538e-04
Loss = 5.7712e-01, PNorm = 40.2980, GNorm = 4.7225, lr_0 = 9.5913e-04
Loss = 5.9591e-01, PNorm = 40.3273, GNorm = 3.1490, lr_0 = 9.6288e-04
Loss = 7.1025e-01, PNorm = 40.3413, GNorm = 1.8923, lr_0 = 9.6663e-04
Loss = 5.7510e-01, PNorm = 40.3665, GNorm = 4.1121, lr_0 = 9.7038e-04
Loss = 4.9399e-01, PNorm = 40.3852, GNorm = 1.5042, lr_0 = 9.7413e-04
Loss = 5.5929e-01, PNorm = 40.4088, GNorm = 3.3606, lr_0 = 9.7788e-04
Loss = 5.7778e-01, PNorm = 40.4249, GNorm = 2.4780, lr_0 = 9.8163e-04
Loss = 6.3502e-01, PNorm = 40.4398, GNorm = 1.4060, lr_0 = 9.8537e-04
Loss = 6.1839e-01, PNorm = 40.4690, GNorm = 0.8656, lr_0 = 9.8912e-04
Loss = 6.5471e-01, PNorm = 40.4867, GNorm = 5.0685, lr_0 = 9.9288e-04
Loss = 5.9139e-01, PNorm = 40.5010, GNorm = 0.9600, lr_0 = 9.9663e-04
Loss = 5.8093e-01, PNorm = 40.5273, GNorm = 3.4483, lr_0 = 9.9993e-04
Validation mae = 0.137254
Epoch 2
Loss = 4.8860e-01, PNorm = 40.5439, GNorm = 1.2475, lr_0 = 9.9925e-04
Loss = 5.7282e-01, PNorm = 40.5688, GNorm = 2.9989, lr_0 = 9.9856e-04
Loss = 5.7797e-01, PNorm = 40.5908, GNorm = 5.9975, lr_0 = 9.9788e-04
Loss = 6.0054e-01, PNorm = 40.6130, GNorm = 1.3116, lr_0 = 9.9719e-04
Loss = 6.5312e-01, PNorm = 40.6348, GNorm = 1.8088, lr_0 = 9.9651e-04
Loss = 5.4065e-01, PNorm = 40.6521, GNorm = 2.2074, lr_0 = 9.9583e-04
Loss = 5.4804e-01, PNorm = 40.6731, GNorm = 3.8496, lr_0 = 9.9515e-04
Loss = 5.7008e-01, PNorm = 40.7007, GNorm = 1.6821, lr_0 = 9.9446e-04
Loss = 5.7959e-01, PNorm = 40.7276, GNorm = 2.6684, lr_0 = 9.9378e-04
Loss = 5.0544e-01, PNorm = 40.7428, GNorm = 0.8274, lr_0 = 9.9310e-04
Loss = 5.3476e-01, PNorm = 40.7620, GNorm = 2.6779, lr_0 = 9.9242e-04
Loss = 5.4113e-01, PNorm = 40.7710, GNorm = 1.2257, lr_0 = 9.9174e-04
Loss = 5.4281e-01, PNorm = 40.7843, GNorm = 3.5156, lr_0 = 9.9106e-04
Loss = 5.8442e-01, PNorm = 40.7955, GNorm = 1.2023, lr_0 = 9.9038e-04
Loss = 5.8098e-01, PNorm = 40.8098, GNorm = 1.0883, lr_0 = 9.8971e-04
Loss = 5.5041e-01, PNorm = 40.8303, GNorm = 1.2413, lr_0 = 9.8903e-04
Loss = 5.6852e-01, PNorm = 40.8455, GNorm = 3.0285, lr_0 = 9.8835e-04
Loss = 6.1235e-01, PNorm = 40.8576, GNorm = 0.8289, lr_0 = 9.8767e-04
Loss = 4.8729e-01, PNorm = 40.8821, GNorm = 2.6176, lr_0 = 9.8700e-04
Loss = 4.7714e-01, PNorm = 40.8981, GNorm = 1.6068, lr_0 = 9.8632e-04
Loss = 6.3408e-01, PNorm = 40.9186, GNorm = 1.6569, lr_0 = 9.8564e-04
Loss = 5.0598e-01, PNorm = 40.9407, GNorm = 1.8724, lr_0 = 9.8497e-04
Loss = 5.6748e-01, PNorm = 40.9656, GNorm = 1.2864, lr_0 = 9.8429e-04
Loss = 5.2796e-01, PNorm = 40.9898, GNorm = 1.5041, lr_0 = 9.8362e-04
Loss = 6.6303e-01, PNorm = 41.0023, GNorm = 3.7863, lr_0 = 9.8295e-04
Loss = 5.1588e-01, PNorm = 41.0187, GNorm = 1.4875, lr_0 = 9.8227e-04
Loss = 4.9217e-01, PNorm = 41.0409, GNorm = 1.5461, lr_0 = 9.8160e-04
Loss = 5.6997e-01, PNorm = 41.0541, GNorm = 2.5815, lr_0 = 9.8093e-04
Loss = 6.0288e-01, PNorm = 41.0736, GNorm = 5.0832, lr_0 = 9.8026e-04
Loss = 5.5860e-01, PNorm = 41.0939, GNorm = 2.9148, lr_0 = 9.7958e-04
Loss = 6.1327e-01, PNorm = 41.1297, GNorm = 1.9624, lr_0 = 9.7891e-04
Loss = 5.7208e-01, PNorm = 41.1565, GNorm = 1.7660, lr_0 = 9.7824e-04
Loss = 5.2187e-01, PNorm = 41.1722, GNorm = 1.2101, lr_0 = 9.7757e-04
Loss = 5.9337e-01, PNorm = 41.1960, GNorm = 1.9443, lr_0 = 9.7690e-04
Loss = 5.6043e-01, PNorm = 41.2217, GNorm = 1.0626, lr_0 = 9.7623e-04
Loss = 5.3137e-01, PNorm = 41.2411, GNorm = 2.9433, lr_0 = 9.7556e-04
Loss = 5.8071e-01, PNorm = 41.2577, GNorm = 2.2943, lr_0 = 9.7490e-04
Loss = 5.4588e-01, PNorm = 41.2750, GNorm = 1.3274, lr_0 = 9.7423e-04
Loss = 5.4929e-01, PNorm = 41.2914, GNorm = 2.3968, lr_0 = 9.7356e-04
Loss = 5.8678e-01, PNorm = 41.3134, GNorm = 1.0876, lr_0 = 9.7289e-04
Loss = 5.7752e-01, PNorm = 41.3446, GNorm = 1.9631, lr_0 = 9.7223e-04
Loss = 5.9449e-01, PNorm = 41.3525, GNorm = 1.3618, lr_0 = 9.7156e-04
Loss = 5.3108e-01, PNorm = 41.3750, GNorm = 1.7151, lr_0 = 9.7090e-04
Loss = 5.9327e-01, PNorm = 41.3934, GNorm = 1.5440, lr_0 = 9.7023e-04
Loss = 5.9233e-01, PNorm = 41.4113, GNorm = 2.0652, lr_0 = 9.6957e-04
Loss = 5.4584e-01, PNorm = 41.4466, GNorm = 3.1704, lr_0 = 9.6890e-04
Loss = 4.4556e-01, PNorm = 41.4603, GNorm = 3.3910, lr_0 = 9.6824e-04
Loss = 6.2385e-01, PNorm = 41.4691, GNorm = 3.4504, lr_0 = 9.6757e-04
Loss = 5.6422e-01, PNorm = 41.4897, GNorm = 2.1991, lr_0 = 9.6691e-04
Loss = 6.0912e-01, PNorm = 41.5130, GNorm = 0.8691, lr_0 = 9.6625e-04
Loss = 4.8201e-01, PNorm = 41.5370, GNorm = 1.3742, lr_0 = 9.6559e-04
Loss = 5.3170e-01, PNorm = 41.5497, GNorm = 2.0422, lr_0 = 9.6493e-04
Loss = 4.9971e-01, PNorm = 41.5655, GNorm = 0.9493, lr_0 = 9.6427e-04
Loss = 5.6373e-01, PNorm = 41.5919, GNorm = 0.9257, lr_0 = 9.6360e-04
Loss = 5.7096e-01, PNorm = 41.6157, GNorm = 1.0563, lr_0 = 9.6294e-04
Loss = 5.4008e-01, PNorm = 41.6291, GNorm = 2.7046, lr_0 = 9.6228e-04
Loss = 5.5005e-01, PNorm = 41.6462, GNorm = 1.3219, lr_0 = 9.6163e-04
Loss = 5.1893e-01, PNorm = 41.6644, GNorm = 1.2656, lr_0 = 9.6097e-04
Loss = 5.0488e-01, PNorm = 41.6901, GNorm = 2.0398, lr_0 = 9.6031e-04
Loss = 5.3831e-01, PNorm = 41.6990, GNorm = 2.4740, lr_0 = 9.5965e-04
Loss = 5.0532e-01, PNorm = 41.7227, GNorm = 0.8870, lr_0 = 9.5899e-04
Loss = 5.2864e-01, PNorm = 41.7396, GNorm = 2.3685, lr_0 = 9.5834e-04
Loss = 6.0414e-01, PNorm = 41.7560, GNorm = 2.9915, lr_0 = 9.5768e-04
Loss = 5.0815e-01, PNorm = 41.7726, GNorm = 1.6178, lr_0 = 9.5702e-04
Loss = 5.8486e-01, PNorm = 41.7946, GNorm = 1.1889, lr_0 = 9.5637e-04
Loss = 5.8127e-01, PNorm = 41.8189, GNorm = 3.0502, lr_0 = 9.5571e-04
Loss = 5.8700e-01, PNorm = 41.8420, GNorm = 2.6991, lr_0 = 9.5506e-04
Loss = 5.4521e-01, PNorm = 41.8643, GNorm = 1.0156, lr_0 = 9.5440e-04
Loss = 6.1120e-01, PNorm = 41.8896, GNorm = 3.2900, lr_0 = 9.5375e-04
Loss = 5.6103e-01, PNorm = 41.9058, GNorm = 1.3653, lr_0 = 9.5310e-04
Loss = 5.3566e-01, PNorm = 41.9186, GNorm = 2.3726, lr_0 = 9.5244e-04
Loss = 6.4727e-01, PNorm = 41.9384, GNorm = 2.7472, lr_0 = 9.5179e-04
Loss = 5.1269e-01, PNorm = 41.9613, GNorm = 0.9811, lr_0 = 9.5114e-04
Loss = 5.9126e-01, PNorm = 41.9715, GNorm = 3.4882, lr_0 = 9.5049e-04
Loss = 5.1988e-01, PNorm = 41.9846, GNorm = 3.0321, lr_0 = 9.4984e-04
Loss = 4.8534e-01, PNorm = 42.0029, GNorm = 1.1363, lr_0 = 9.4919e-04
Loss = 4.7426e-01, PNorm = 42.0194, GNorm = 1.6053, lr_0 = 9.4854e-04
Loss = 5.1173e-01, PNorm = 42.0431, GNorm = 0.9500, lr_0 = 9.4789e-04
Loss = 5.3024e-01, PNorm = 42.0661, GNorm = 1.6157, lr_0 = 9.4724e-04
Loss = 5.3118e-01, PNorm = 42.0813, GNorm = 1.0912, lr_0 = 9.4659e-04
Loss = 5.2837e-01, PNorm = 42.1015, GNorm = 3.7649, lr_0 = 9.4594e-04
Loss = 5.1675e-01, PNorm = 42.1163, GNorm = 1.7216, lr_0 = 9.4529e-04
Loss = 5.0830e-01, PNorm = 42.1420, GNorm = 4.1136, lr_0 = 9.4464e-04
Loss = 5.6275e-01, PNorm = 42.1603, GNorm = 1.3865, lr_0 = 9.4400e-04
Loss = 5.6342e-01, PNorm = 42.1770, GNorm = 2.0675, lr_0 = 9.4335e-04
Loss = 5.5070e-01, PNorm = 42.2061, GNorm = 1.0976, lr_0 = 9.4270e-04
Loss = 4.6497e-01, PNorm = 42.2287, GNorm = 0.9894, lr_0 = 9.4206e-04
Loss = 5.5669e-01, PNorm = 42.2408, GNorm = 1.2589, lr_0 = 9.4141e-04
Loss = 5.3281e-01, PNorm = 42.2531, GNorm = 1.3835, lr_0 = 9.4077e-04
Loss = 5.2790e-01, PNorm = 42.2739, GNorm = 3.2710, lr_0 = 9.4012e-04
Loss = 4.7707e-01, PNorm = 42.2902, GNorm = 1.1095, lr_0 = 9.3948e-04
Loss = 5.6158e-01, PNorm = 42.3061, GNorm = 1.3923, lr_0 = 9.3884e-04
Loss = 4.9782e-01, PNorm = 42.3175, GNorm = 1.3535, lr_0 = 9.3819e-04
Loss = 5.8368e-01, PNorm = 42.3311, GNorm = 1.7105, lr_0 = 9.3755e-04
Loss = 5.1952e-01, PNorm = 42.3547, GNorm = 2.2264, lr_0 = 9.3691e-04
Loss = 4.7929e-01, PNorm = 42.3707, GNorm = 1.7758, lr_0 = 9.3627e-04
Loss = 5.4002e-01, PNorm = 42.3821, GNorm = 1.1250, lr_0 = 9.3562e-04
Loss = 5.7745e-01, PNorm = 42.3981, GNorm = 3.6947, lr_0 = 9.3498e-04
Loss = 5.6772e-01, PNorm = 42.4194, GNorm = 1.8443, lr_0 = 9.3434e-04
Loss = 5.6169e-01, PNorm = 42.4461, GNorm = 1.8201, lr_0 = 9.3370e-04
Loss = 4.9236e-01, PNorm = 42.4667, GNorm = 3.2845, lr_0 = 9.3306e-04
Loss = 5.1723e-01, PNorm = 42.4828, GNorm = 1.5065, lr_0 = 9.3242e-04
Loss = 5.4032e-01, PNorm = 42.5064, GNorm = 1.1905, lr_0 = 9.3178e-04
Loss = 4.8377e-01, PNorm = 42.5244, GNorm = 1.3159, lr_0 = 9.3115e-04
Loss = 5.1361e-01, PNorm = 42.5423, GNorm = 2.9129, lr_0 = 9.3051e-04
Loss = 5.3790e-01, PNorm = 42.5577, GNorm = 1.4393, lr_0 = 9.2987e-04
Loss = 5.1076e-01, PNorm = 42.5739, GNorm = 1.2567, lr_0 = 9.2923e-04
Loss = 5.0115e-01, PNorm = 42.5913, GNorm = 1.1067, lr_0 = 9.2860e-04
Loss = 5.5132e-01, PNorm = 42.6129, GNorm = 1.2364, lr_0 = 9.2796e-04
Loss = 5.4305e-01, PNorm = 42.6343, GNorm = 2.3541, lr_0 = 9.2733e-04
Loss = 5.5142e-01, PNorm = 42.6510, GNorm = 1.3508, lr_0 = 9.2669e-04
Loss = 5.1311e-01, PNorm = 42.6709, GNorm = 2.2730, lr_0 = 9.2606e-04
Loss = 5.2339e-01, PNorm = 42.6899, GNorm = 1.3703, lr_0 = 9.2542e-04
Loss = 5.1724e-01, PNorm = 42.7198, GNorm = 1.0629, lr_0 = 9.2479e-04
Loss = 5.5704e-01, PNorm = 42.7484, GNorm = 1.4435, lr_0 = 9.2415e-04
Loss = 5.0347e-01, PNorm = 42.7690, GNorm = 1.4841, lr_0 = 9.2352e-04
Loss = 5.1300e-01, PNorm = 42.7962, GNorm = 0.9564, lr_0 = 9.2289e-04
Loss = 4.5367e-01, PNorm = 42.8132, GNorm = 1.1859, lr_0 = 9.2226e-04
Loss = 5.0846e-01, PNorm = 42.8274, GNorm = 2.4659, lr_0 = 9.2162e-04
Loss = 5.6687e-01, PNorm = 42.8443, GNorm = 2.4780, lr_0 = 9.2099e-04
Validation mae = 0.128865
Epoch 3
Loss = 4.6414e-01, PNorm = 42.8682, GNorm = 0.9468, lr_0 = 9.2036e-04
Loss = 4.7856e-01, PNorm = 42.8770, GNorm = 1.4502, lr_0 = 9.1973e-04
Loss = 5.5119e-01, PNorm = 42.8897, GNorm = 1.2037, lr_0 = 9.1910e-04
Loss = 4.7476e-01, PNorm = 42.9122, GNorm = 2.0928, lr_0 = 9.1847e-04
Loss = 5.1006e-01, PNorm = 42.9265, GNorm = 2.5860, lr_0 = 9.1784e-04
Loss = 4.8701e-01, PNorm = 42.9448, GNorm = 1.1018, lr_0 = 9.1721e-04
Loss = 4.9606e-01, PNorm = 42.9643, GNorm = 2.1563, lr_0 = 9.1658e-04
Loss = 4.8423e-01, PNorm = 42.9815, GNorm = 2.0136, lr_0 = 9.1596e-04
Loss = 4.9317e-01, PNorm = 42.9949, GNorm = 0.9344, lr_0 = 9.1533e-04
Loss = 4.9589e-01, PNorm = 43.0168, GNorm = 2.0970, lr_0 = 9.1470e-04
Loss = 5.4296e-01, PNorm = 43.0316, GNorm = 1.1827, lr_0 = 9.1408e-04
Loss = 5.3993e-01, PNorm = 43.0551, GNorm = 3.9607, lr_0 = 9.1345e-04
Loss = 5.2044e-01, PNorm = 43.0784, GNorm = 1.4497, lr_0 = 9.1282e-04
Loss = 5.3159e-01, PNorm = 43.1075, GNorm = 2.0904, lr_0 = 9.1220e-04
Loss = 4.5654e-01, PNorm = 43.1296, GNorm = 1.7295, lr_0 = 9.1157e-04
Loss = 4.9944e-01, PNorm = 43.1389, GNorm = 1.3092, lr_0 = 9.1095e-04
Loss = 4.3646e-01, PNorm = 43.1486, GNorm = 2.4784, lr_0 = 9.1032e-04
Loss = 4.6929e-01, PNorm = 43.1704, GNorm = 1.1340, lr_0 = 9.0970e-04
Loss = 5.4640e-01, PNorm = 43.2014, GNorm = 1.9229, lr_0 = 9.0908e-04
Loss = 4.5279e-01, PNorm = 43.2193, GNorm = 2.8910, lr_0 = 9.0846e-04
Loss = 5.4103e-01, PNorm = 43.2363, GNorm = 1.5564, lr_0 = 9.0783e-04
Loss = 5.1495e-01, PNorm = 43.2646, GNorm = 1.5479, lr_0 = 9.0721e-04
Loss = 5.6816e-01, PNorm = 43.2865, GNorm = 3.7525, lr_0 = 9.0659e-04
Loss = 4.6265e-01, PNorm = 43.3066, GNorm = 1.1177, lr_0 = 9.0597e-04
Loss = 5.0438e-01, PNorm = 43.3234, GNorm = 2.2880, lr_0 = 9.0535e-04
Loss = 5.2322e-01, PNorm = 43.3480, GNorm = 2.2561, lr_0 = 9.0473e-04
Loss = 5.1366e-01, PNorm = 43.3761, GNorm = 1.6001, lr_0 = 9.0411e-04
Loss = 5.1350e-01, PNorm = 43.3972, GNorm = 1.0661, lr_0 = 9.0349e-04
Loss = 5.3566e-01, PNorm = 43.4092, GNorm = 1.1641, lr_0 = 9.0287e-04
Loss = 6.0729e-01, PNorm = 43.4264, GNorm = 2.1555, lr_0 = 9.0225e-04
Loss = 4.8932e-01, PNorm = 43.4562, GNorm = 1.9967, lr_0 = 9.0163e-04
Loss = 5.0998e-01, PNorm = 43.4713, GNorm = 1.2860, lr_0 = 9.0102e-04
Loss = 6.0098e-01, PNorm = 43.4864, GNorm = 1.8941, lr_0 = 9.0040e-04
Loss = 4.8686e-01, PNorm = 43.5062, GNorm = 2.0513, lr_0 = 8.9978e-04
Loss = 4.7537e-01, PNorm = 43.5265, GNorm = 1.0004, lr_0 = 8.9916e-04
Loss = 4.3176e-01, PNorm = 43.5450, GNorm = 1.2425, lr_0 = 8.9855e-04
Loss = 5.6106e-01, PNorm = 43.5615, GNorm = 2.5173, lr_0 = 8.9793e-04
Loss = 5.1508e-01, PNorm = 43.5936, GNorm = 1.8540, lr_0 = 8.9732e-04
Loss = 5.0725e-01, PNorm = 43.6084, GNorm = 1.2465, lr_0 = 8.9670e-04
Loss = 5.7167e-01, PNorm = 43.6267, GNorm = 1.9483, lr_0 = 8.9609e-04
Loss = 5.9330e-01, PNorm = 43.6510, GNorm = 2.0223, lr_0 = 8.9548e-04
Loss = 4.5478e-01, PNorm = 43.6592, GNorm = 0.9487, lr_0 = 8.9486e-04
Loss = 5.8534e-01, PNorm = 43.6751, GNorm = 1.4778, lr_0 = 8.9425e-04
Loss = 4.6067e-01, PNorm = 43.6987, GNorm = 1.3341, lr_0 = 8.9364e-04
Loss = 5.1754e-01, PNorm = 43.7174, GNorm = 2.9497, lr_0 = 8.9302e-04
Loss = 5.1190e-01, PNorm = 43.7348, GNorm = 1.4586, lr_0 = 8.9241e-04
Loss = 4.9536e-01, PNorm = 43.7545, GNorm = 2.5173, lr_0 = 8.9180e-04
Loss = 5.3458e-01, PNorm = 43.7701, GNorm = 4.0089, lr_0 = 8.9119e-04
Loss = 4.8431e-01, PNorm = 43.7944, GNorm = 1.8514, lr_0 = 8.9058e-04
Loss = 5.2395e-01, PNorm = 43.8209, GNorm = 1.8505, lr_0 = 8.8997e-04
Loss = 4.9787e-01, PNorm = 43.8494, GNorm = 3.7420, lr_0 = 8.8936e-04
Loss = 5.0044e-01, PNorm = 43.8663, GNorm = 1.3206, lr_0 = 8.8875e-04
Loss = 5.2156e-01, PNorm = 43.8856, GNorm = 1.5166, lr_0 = 8.8814e-04
Loss = 5.6873e-01, PNorm = 43.9097, GNorm = 2.4992, lr_0 = 8.8753e-04
Loss = 5.2286e-01, PNorm = 43.9295, GNorm = 1.3570, lr_0 = 8.8693e-04
Loss = 5.1747e-01, PNorm = 43.9475, GNorm = 1.2576, lr_0 = 8.8632e-04
Loss = 5.1608e-01, PNorm = 43.9686, GNorm = 1.2071, lr_0 = 8.8571e-04
Loss = 5.5856e-01, PNorm = 43.9933, GNorm = 1.3550, lr_0 = 8.8510e-04
Loss = 4.3299e-01, PNorm = 44.0109, GNorm = 3.0614, lr_0 = 8.8450e-04
Loss = 6.8681e-01, PNorm = 44.0230, GNorm = 1.5016, lr_0 = 8.8389e-04
Loss = 5.5055e-01, PNorm = 44.0495, GNorm = 2.1756, lr_0 = 8.8329e-04
Loss = 5.7171e-01, PNorm = 44.0736, GNorm = 2.6710, lr_0 = 8.8268e-04
Loss = 4.9046e-01, PNorm = 44.0899, GNorm = 0.8916, lr_0 = 8.8208e-04
Loss = 5.3335e-01, PNorm = 44.1054, GNorm = 1.6335, lr_0 = 8.8147e-04
Loss = 5.1858e-01, PNorm = 44.1286, GNorm = 2.6177, lr_0 = 8.8087e-04
Loss = 4.8492e-01, PNorm = 44.1443, GNorm = 0.9926, lr_0 = 8.8026e-04
Loss = 4.6803e-01, PNorm = 44.1658, GNorm = 1.8493, lr_0 = 8.7966e-04
Loss = 4.9138e-01, PNorm = 44.1860, GNorm = 1.1391, lr_0 = 8.7906e-04
Loss = 5.2073e-01, PNorm = 44.1999, GNorm = 1.0074, lr_0 = 8.7846e-04
Loss = 4.6454e-01, PNorm = 44.2199, GNorm = 0.9147, lr_0 = 8.7785e-04
Loss = 4.8182e-01, PNorm = 44.2317, GNorm = 1.0934, lr_0 = 8.7725e-04
Loss = 5.2646e-01, PNorm = 44.2490, GNorm = 2.0786, lr_0 = 8.7665e-04
Loss = 5.0663e-01, PNorm = 44.2607, GNorm = 2.3135, lr_0 = 8.7605e-04
Loss = 5.1848e-01, PNorm = 44.2862, GNorm = 2.6604, lr_0 = 8.7545e-04
Loss = 5.2422e-01, PNorm = 44.3073, GNorm = 1.6999, lr_0 = 8.7485e-04
Loss = 5.0248e-01, PNorm = 44.3369, GNorm = 1.5915, lr_0 = 8.7425e-04
Loss = 5.1535e-01, PNorm = 44.3543, GNorm = 1.7924, lr_0 = 8.7365e-04
Loss = 5.1573e-01, PNorm = 44.3749, GNorm = 0.9523, lr_0 = 8.7306e-04
Loss = 5.2388e-01, PNorm = 44.3923, GNorm = 1.8777, lr_0 = 8.7246e-04
Loss = 5.2849e-01, PNorm = 44.4076, GNorm = 1.2091, lr_0 = 8.7186e-04
Loss = 4.8089e-01, PNorm = 44.4298, GNorm = 1.6211, lr_0 = 8.7126e-04
Loss = 4.7405e-01, PNorm = 44.4500, GNorm = 0.9436, lr_0 = 8.7067e-04
Loss = 5.0666e-01, PNorm = 44.4740, GNorm = 2.5419, lr_0 = 8.7007e-04
Loss = 4.6820e-01, PNorm = 44.4804, GNorm = 1.1585, lr_0 = 8.6947e-04
Loss = 4.9468e-01, PNorm = 44.5000, GNorm = 1.4248, lr_0 = 8.6888e-04
Loss = 4.4461e-01, PNorm = 44.5086, GNorm = 0.9598, lr_0 = 8.6828e-04
Loss = 5.5439e-01, PNorm = 44.5288, GNorm = 2.3407, lr_0 = 8.6769e-04
Loss = 5.4864e-01, PNorm = 44.5560, GNorm = 1.8212, lr_0 = 8.6709e-04
Loss = 4.8556e-01, PNorm = 44.5745, GNorm = 1.7245, lr_0 = 8.6650e-04
Loss = 5.2047e-01, PNorm = 44.5913, GNorm = 1.4820, lr_0 = 8.6590e-04
Loss = 5.3449e-01, PNorm = 44.6082, GNorm = 1.0974, lr_0 = 8.6531e-04
Loss = 4.3399e-01, PNorm = 44.6219, GNorm = 1.9664, lr_0 = 8.6472e-04
Loss = 5.0154e-01, PNorm = 44.6429, GNorm = 2.0929, lr_0 = 8.6413e-04
Loss = 4.8938e-01, PNorm = 44.6674, GNorm = 1.2245, lr_0 = 8.6353e-04
Loss = 5.3104e-01, PNorm = 44.6811, GNorm = 1.0237, lr_0 = 8.6294e-04
Loss = 4.7732e-01, PNorm = 44.6994, GNorm = 1.1276, lr_0 = 8.6235e-04
Loss = 4.7059e-01, PNorm = 44.7192, GNorm = 1.4722, lr_0 = 8.6176e-04
Loss = 4.3866e-01, PNorm = 44.7322, GNorm = 1.4256, lr_0 = 8.6117e-04
Loss = 4.8193e-01, PNorm = 44.7435, GNorm = 2.2724, lr_0 = 8.6058e-04
Loss = 4.9653e-01, PNorm = 44.7594, GNorm = 1.0770, lr_0 = 8.5999e-04
Loss = 5.0498e-01, PNorm = 44.7724, GNorm = 0.9392, lr_0 = 8.5940e-04
Loss = 4.9697e-01, PNorm = 44.7950, GNorm = 2.0249, lr_0 = 8.5881e-04
Loss = 5.2061e-01, PNorm = 44.8031, GNorm = 1.9464, lr_0 = 8.5823e-04
Loss = 5.2290e-01, PNorm = 44.8214, GNorm = 1.6686, lr_0 = 8.5764e-04
Loss = 5.0598e-01, PNorm = 44.8303, GNorm = 1.0101, lr_0 = 8.5705e-04
Loss = 5.2760e-01, PNorm = 44.8427, GNorm = 1.0762, lr_0 = 8.5646e-04
Loss = 4.7320e-01, PNorm = 44.8622, GNorm = 1.2992, lr_0 = 8.5588e-04
Loss = 5.2051e-01, PNorm = 44.8781, GNorm = 2.7310, lr_0 = 8.5529e-04
Loss = 5.4001e-01, PNorm = 44.8990, GNorm = 1.8729, lr_0 = 8.5470e-04
Loss = 5.2249e-01, PNorm = 44.9152, GNorm = 1.2307, lr_0 = 8.5412e-04
Loss = 4.7587e-01, PNorm = 44.9355, GNorm = 1.2038, lr_0 = 8.5353e-04
Loss = 4.5421e-01, PNorm = 44.9501, GNorm = 2.0486, lr_0 = 8.5295e-04
Loss = 4.7633e-01, PNorm = 44.9667, GNorm = 1.3504, lr_0 = 8.5236e-04
Loss = 5.5017e-01, PNorm = 44.9839, GNorm = 2.0394, lr_0 = 8.5178e-04
Loss = 5.3838e-01, PNorm = 44.9996, GNorm = 1.1702, lr_0 = 8.5120e-04
Loss = 4.8252e-01, PNorm = 45.0152, GNorm = 2.1099, lr_0 = 8.5061e-04
Loss = 4.7305e-01, PNorm = 45.0261, GNorm = 2.0099, lr_0 = 8.5003e-04
Loss = 5.9082e-01, PNorm = 45.0411, GNorm = 3.4655, lr_0 = 8.4945e-04
Loss = 5.6320e-01, PNorm = 45.0599, GNorm = 1.1247, lr_0 = 8.4887e-04
Loss = 4.6702e-01, PNorm = 45.0795, GNorm = 1.3969, lr_0 = 8.4828e-04
Validation mae = 0.123391
Epoch 4
Loss = 4.8557e-01, PNorm = 45.0941, GNorm = 1.2418, lr_0 = 8.4770e-04
Loss = 5.1841e-01, PNorm = 45.1182, GNorm = 1.8528, lr_0 = 8.4712e-04
Loss = 5.6143e-01, PNorm = 45.1343, GNorm = 3.2772, lr_0 = 8.4654e-04
Loss = 5.3694e-01, PNorm = 45.1518, GNorm = 1.3790, lr_0 = 8.4596e-04
Loss = 5.1466e-01, PNorm = 45.1747, GNorm = 2.5186, lr_0 = 8.4538e-04
Loss = 5.1400e-01, PNorm = 45.1871, GNorm = 1.7039, lr_0 = 8.4480e-04
Loss = 4.6227e-01, PNorm = 45.2167, GNorm = 1.0250, lr_0 = 8.4423e-04
Loss = 4.8238e-01, PNorm = 45.2424, GNorm = 1.6265, lr_0 = 8.4365e-04
Loss = 5.0277e-01, PNorm = 45.2525, GNorm = 1.8648, lr_0 = 8.4307e-04
Loss = 4.8727e-01, PNorm = 45.2728, GNorm = 1.0574, lr_0 = 8.4249e-04
Loss = 4.1762e-01, PNorm = 45.2938, GNorm = 0.9497, lr_0 = 8.4191e-04
Loss = 4.9455e-01, PNorm = 45.3060, GNorm = 1.4537, lr_0 = 8.4134e-04
Loss = 5.5760e-01, PNorm = 45.3261, GNorm = 3.0213, lr_0 = 8.4076e-04
Loss = 5.2339e-01, PNorm = 45.3477, GNorm = 2.6137, lr_0 = 8.4019e-04
Loss = 5.0508e-01, PNorm = 45.3742, GNorm = 1.0125, lr_0 = 8.3961e-04
Loss = 5.2860e-01, PNorm = 45.3960, GNorm = 1.7210, lr_0 = 8.3903e-04
Loss = 4.0612e-01, PNorm = 45.4179, GNorm = 0.9710, lr_0 = 8.3846e-04
Loss = 4.8351e-01, PNorm = 45.4353, GNorm = 1.5754, lr_0 = 8.3789e-04
Loss = 4.9725e-01, PNorm = 45.4587, GNorm = 1.4623, lr_0 = 8.3731e-04
Loss = 4.9294e-01, PNorm = 45.4767, GNorm = 1.3189, lr_0 = 8.3674e-04
Loss = 5.3414e-01, PNorm = 45.4914, GNorm = 1.9909, lr_0 = 8.3616e-04
Loss = 5.0874e-01, PNorm = 45.5008, GNorm = 2.0908, lr_0 = 8.3559e-04
Loss = 5.3230e-01, PNorm = 45.5269, GNorm = 3.8007, lr_0 = 8.3502e-04
Loss = 5.1810e-01, PNorm = 45.5527, GNorm = 1.9214, lr_0 = 8.3445e-04
Loss = 4.9365e-01, PNorm = 45.5805, GNorm = 1.1537, lr_0 = 8.3388e-04
Loss = 5.1168e-01, PNorm = 45.6232, GNorm = 1.0826, lr_0 = 8.3330e-04
Loss = 4.5744e-01, PNorm = 45.6433, GNorm = 1.4158, lr_0 = 8.3273e-04
Loss = 5.2301e-01, PNorm = 45.6577, GNorm = 1.1536, lr_0 = 8.3216e-04
Loss = 4.9335e-01, PNorm = 45.6679, GNorm = 1.1199, lr_0 = 8.3159e-04
Loss = 4.5357e-01, PNorm = 45.6903, GNorm = 1.5227, lr_0 = 8.3102e-04
Loss = 4.6516e-01, PNorm = 45.7030, GNorm = 1.7544, lr_0 = 8.3045e-04
Loss = 4.4239e-01, PNorm = 45.7153, GNorm = 1.1433, lr_0 = 8.2988e-04
Loss = 4.7890e-01, PNorm = 45.7325, GNorm = 2.1254, lr_0 = 8.2932e-04
Loss = 5.1571e-01, PNorm = 45.7488, GNorm = 1.3607, lr_0 = 8.2875e-04
Loss = 4.9194e-01, PNorm = 45.7689, GNorm = 1.4519, lr_0 = 8.2818e-04
Loss = 4.1548e-01, PNorm = 45.7818, GNorm = 1.0141, lr_0 = 8.2761e-04
Loss = 4.6127e-01, PNorm = 45.7946, GNorm = 1.4801, lr_0 = 8.2705e-04
Loss = 4.8753e-01, PNorm = 45.8184, GNorm = 1.3991, lr_0 = 8.2648e-04
Loss = 4.8052e-01, PNorm = 45.8416, GNorm = 1.9574, lr_0 = 8.2591e-04
Loss = 5.1303e-01, PNorm = 45.8584, GNorm = 1.4606, lr_0 = 8.2535e-04
Loss = 5.2532e-01, PNorm = 45.8795, GNorm = 1.7999, lr_0 = 8.2478e-04
Loss = 4.7261e-01, PNorm = 45.8997, GNorm = 1.4527, lr_0 = 8.2422e-04
Loss = 4.8704e-01, PNorm = 45.9185, GNorm = 1.2978, lr_0 = 8.2365e-04
Loss = 5.4282e-01, PNorm = 45.9419, GNorm = 1.4222, lr_0 = 8.2309e-04
Loss = 4.8364e-01, PNorm = 45.9633, GNorm = 1.2205, lr_0 = 8.2252e-04
Loss = 4.5780e-01, PNorm = 45.9753, GNorm = 1.0834, lr_0 = 8.2196e-04
Loss = 5.1861e-01, PNorm = 45.9877, GNorm = 2.1239, lr_0 = 8.2140e-04
Loss = 5.6941e-01, PNorm = 46.0124, GNorm = 2.1211, lr_0 = 8.2084e-04
Loss = 4.8780e-01, PNorm = 46.0345, GNorm = 1.2585, lr_0 = 8.2027e-04
Loss = 5.9738e-01, PNorm = 46.0538, GNorm = 1.5420, lr_0 = 8.1971e-04
Loss = 4.4778e-01, PNorm = 46.0734, GNorm = 1.8527, lr_0 = 8.1915e-04
Loss = 5.5864e-01, PNorm = 46.0946, GNorm = 1.3056, lr_0 = 8.1859e-04
Loss = 5.2614e-01, PNorm = 46.1163, GNorm = 2.6937, lr_0 = 8.1803e-04
Loss = 4.7517e-01, PNorm = 46.1303, GNorm = 1.5810, lr_0 = 8.1747e-04
Loss = 4.8775e-01, PNorm = 46.1457, GNorm = 2.0632, lr_0 = 8.1691e-04
Loss = 4.8430e-01, PNorm = 46.1677, GNorm = 1.3348, lr_0 = 8.1635e-04
Loss = 4.5072e-01, PNorm = 46.1815, GNorm = 1.0895, lr_0 = 8.1579e-04
Loss = 4.9852e-01, PNorm = 46.1985, GNorm = 2.1316, lr_0 = 8.1523e-04
Loss = 4.6049e-01, PNorm = 46.2176, GNorm = 1.5465, lr_0 = 8.1467e-04
Loss = 5.0492e-01, PNorm = 46.2340, GNorm = 3.1721, lr_0 = 8.1411e-04
Loss = 5.3359e-01, PNorm = 46.2504, GNorm = 2.2065, lr_0 = 8.1355e-04
Loss = 5.7047e-01, PNorm = 46.2734, GNorm = 1.1769, lr_0 = 8.1300e-04
Loss = 4.9609e-01, PNorm = 46.3077, GNorm = 1.2667, lr_0 = 8.1244e-04
Loss = 4.5187e-01, PNorm = 46.3305, GNorm = 1.0203, lr_0 = 8.1188e-04
Loss = 4.5881e-01, PNorm = 46.3441, GNorm = 0.9317, lr_0 = 8.1133e-04
Loss = 4.9539e-01, PNorm = 46.3655, GNorm = 2.0545, lr_0 = 8.1077e-04
Loss = 5.1170e-01, PNorm = 46.3801, GNorm = 1.1724, lr_0 = 8.1022e-04
Loss = 5.1288e-01, PNorm = 46.3958, GNorm = 1.6876, lr_0 = 8.0966e-04
Loss = 5.3995e-01, PNorm = 46.4148, GNorm = 1.4806, lr_0 = 8.0911e-04
Loss = 4.3407e-01, PNorm = 46.4297, GNorm = 1.3233, lr_0 = 8.0855e-04
Loss = 5.2501e-01, PNorm = 46.4389, GNorm = 1.2171, lr_0 = 8.0800e-04
Loss = 4.7563e-01, PNorm = 46.4483, GNorm = 1.4679, lr_0 = 8.0745e-04
Loss = 5.0752e-01, PNorm = 46.4619, GNorm = 3.4620, lr_0 = 8.0689e-04
Loss = 5.5201e-01, PNorm = 46.4837, GNorm = 1.8076, lr_0 = 8.0634e-04
Loss = 4.4425e-01, PNorm = 46.5078, GNorm = 1.3990, lr_0 = 8.0579e-04
Loss = 4.8044e-01, PNorm = 46.5261, GNorm = 1.9556, lr_0 = 8.0523e-04
Loss = 5.5011e-01, PNorm = 46.5450, GNorm = 1.7560, lr_0 = 8.0468e-04
Loss = 4.5422e-01, PNorm = 46.5722, GNorm = 1.1581, lr_0 = 8.0413e-04
Loss = 5.0737e-01, PNorm = 46.6028, GNorm = 1.9743, lr_0 = 8.0358e-04
Loss = 4.7448e-01, PNorm = 46.6230, GNorm = 1.2434, lr_0 = 8.0303e-04
Loss = 4.5101e-01, PNorm = 46.6395, GNorm = 1.3291, lr_0 = 8.0248e-04
Loss = 5.5752e-01, PNorm = 46.6617, GNorm = 1.9580, lr_0 = 8.0193e-04
Loss = 4.4129e-01, PNorm = 46.6810, GNorm = 1.1590, lr_0 = 8.0138e-04
Loss = 4.3777e-01, PNorm = 46.6863, GNorm = 1.5626, lr_0 = 8.0083e-04
Loss = 4.5668e-01, PNorm = 46.6981, GNorm = 0.9234, lr_0 = 8.0028e-04
Loss = 4.9623e-01, PNorm = 46.7134, GNorm = 0.7862, lr_0 = 7.9974e-04
Loss = 4.5322e-01, PNorm = 46.7239, GNorm = 1.1113, lr_0 = 7.9919e-04
Loss = 4.4992e-01, PNorm = 46.7476, GNorm = 1.1205, lr_0 = 7.9864e-04
Loss = 4.4091e-01, PNorm = 46.7619, GNorm = 0.7640, lr_0 = 7.9809e-04
Loss = 4.9175e-01, PNorm = 46.7796, GNorm = 0.7760, lr_0 = 7.9755e-04
Loss = 4.2417e-01, PNorm = 46.7960, GNorm = 1.6933, lr_0 = 7.9700e-04
Loss = 5.1400e-01, PNorm = 46.8148, GNorm = 3.0577, lr_0 = 7.9645e-04
Loss = 5.3681e-01, PNorm = 46.8445, GNorm = 1.9986, lr_0 = 7.9591e-04
Loss = 5.4135e-01, PNorm = 46.8684, GNorm = 2.5064, lr_0 = 7.9536e-04
Loss = 4.0479e-01, PNorm = 46.8888, GNorm = 1.0035, lr_0 = 7.9482e-04
Loss = 4.9332e-01, PNorm = 46.9037, GNorm = 1.4101, lr_0 = 7.9427e-04
Loss = 4.8093e-01, PNorm = 46.9165, GNorm = 1.3785, lr_0 = 7.9373e-04
Loss = 4.5034e-01, PNorm = 46.9247, GNorm = 2.4358, lr_0 = 7.9319e-04
Loss = 4.6243e-01, PNorm = 46.9366, GNorm = 1.3017, lr_0 = 7.9264e-04
Loss = 4.8704e-01, PNorm = 46.9574, GNorm = 1.9292, lr_0 = 7.9210e-04
Loss = 5.0653e-01, PNorm = 46.9770, GNorm = 2.1994, lr_0 = 7.9156e-04
Loss = 4.8067e-01, PNorm = 46.9802, GNorm = 3.0170, lr_0 = 7.9101e-04
Loss = 5.3722e-01, PNorm = 46.9954, GNorm = 0.9697, lr_0 = 7.9047e-04
Loss = 4.9953e-01, PNorm = 47.0195, GNorm = 1.2580, lr_0 = 7.8993e-04
Loss = 5.3230e-01, PNorm = 47.0327, GNorm = 0.9626, lr_0 = 7.8939e-04
Loss = 4.9273e-01, PNorm = 47.0504, GNorm = 0.9758, lr_0 = 7.8885e-04
Loss = 4.6054e-01, PNorm = 47.0601, GNorm = 1.1878, lr_0 = 7.8831e-04
Loss = 4.7175e-01, PNorm = 47.0740, GNorm = 2.2854, lr_0 = 7.8777e-04
Loss = 4.4015e-01, PNorm = 47.0939, GNorm = 1.0419, lr_0 = 7.8723e-04
Loss = 4.8084e-01, PNorm = 47.1022, GNorm = 1.1450, lr_0 = 7.8669e-04
Loss = 5.6747e-01, PNorm = 47.1159, GNorm = 0.9853, lr_0 = 7.8615e-04
Loss = 4.6445e-01, PNorm = 47.1329, GNorm = 1.0944, lr_0 = 7.8561e-04
Loss = 5.1520e-01, PNorm = 47.1539, GNorm = 1.7818, lr_0 = 7.8507e-04
Loss = 4.6574e-01, PNorm = 47.1712, GNorm = 1.5570, lr_0 = 7.8454e-04
Loss = 4.8209e-01, PNorm = 47.1932, GNorm = 3.5905, lr_0 = 7.8400e-04
Loss = 4.6139e-01, PNorm = 47.2154, GNorm = 0.7493, lr_0 = 7.8346e-04
Loss = 4.1688e-01, PNorm = 47.2307, GNorm = 1.7014, lr_0 = 7.8293e-04
Loss = 5.0826e-01, PNorm = 47.2539, GNorm = 1.8242, lr_0 = 7.8239e-04
Loss = 4.8055e-01, PNorm = 47.2712, GNorm = 0.8922, lr_0 = 7.8185e-04
Loss = 5.0357e-01, PNorm = 47.2888, GNorm = 1.2579, lr_0 = 7.8132e-04
Validation mae = 0.125576
Epoch 5
Loss = 5.3711e-01, PNorm = 47.2931, GNorm = 1.0614, lr_0 = 7.8078e-04
Loss = 5.5806e-01, PNorm = 47.3210, GNorm = 1.7699, lr_0 = 7.8025e-04
Loss = 4.7308e-01, PNorm = 47.3484, GNorm = 0.9429, lr_0 = 7.7971e-04
Loss = 4.5543e-01, PNorm = 47.3715, GNorm = 3.0426, lr_0 = 7.7918e-04
Loss = 4.6871e-01, PNorm = 47.3867, GNorm = 1.1032, lr_0 = 7.7864e-04
Loss = 5.3746e-01, PNorm = 47.4043, GNorm = 1.3851, lr_0 = 7.7811e-04
Loss = 5.0525e-01, PNorm = 47.4189, GNorm = 1.1148, lr_0 = 7.7758e-04
Loss = 4.8930e-01, PNorm = 47.4454, GNorm = 1.3670, lr_0 = 7.7705e-04
Loss = 4.4614e-01, PNorm = 47.4682, GNorm = 0.8989, lr_0 = 7.7651e-04
Loss = 4.2689e-01, PNorm = 47.4859, GNorm = 1.4493, lr_0 = 7.7598e-04
Loss = 4.4844e-01, PNorm = 47.5047, GNorm = 1.3586, lr_0 = 7.7545e-04
Loss = 4.2415e-01, PNorm = 47.5200, GNorm = 2.8331, lr_0 = 7.7492e-04
Loss = 4.1812e-01, PNorm = 47.5395, GNorm = 2.4227, lr_0 = 7.7439e-04
Loss = 5.3017e-01, PNorm = 47.5562, GNorm = 1.6646, lr_0 = 7.7386e-04
Loss = 4.5871e-01, PNorm = 47.5741, GNorm = 1.6264, lr_0 = 7.7333e-04
Loss = 3.8184e-01, PNorm = 47.5914, GNorm = 1.0642, lr_0 = 7.7280e-04
Loss = 4.5020e-01, PNorm = 47.6011, GNorm = 1.4206, lr_0 = 7.7227e-04
Loss = 4.1407e-01, PNorm = 47.6143, GNorm = 1.3350, lr_0 = 7.7174e-04
Loss = 4.5270e-01, PNorm = 47.6319, GNorm = 0.8100, lr_0 = 7.7121e-04
Loss = 4.4842e-01, PNorm = 47.6461, GNorm = 1.0391, lr_0 = 7.7068e-04
Loss = 4.5405e-01, PNorm = 47.6522, GNorm = 1.8564, lr_0 = 7.7015e-04
Loss = 4.3367e-01, PNorm = 47.6790, GNorm = 1.1904, lr_0 = 7.6963e-04
Loss = 4.5126e-01, PNorm = 47.6989, GNorm = 1.2654, lr_0 = 7.6910e-04
Loss = 4.3496e-01, PNorm = 47.7137, GNorm = 1.3401, lr_0 = 7.6857e-04
Loss = 4.5374e-01, PNorm = 47.7336, GNorm = 1.1982, lr_0 = 7.6805e-04
Loss = 4.9210e-01, PNorm = 47.7497, GNorm = 1.0592, lr_0 = 7.6752e-04
Loss = 4.5419e-01, PNorm = 47.7679, GNorm = 2.0156, lr_0 = 7.6699e-04
Loss = 4.5455e-01, PNorm = 47.7972, GNorm = 0.9520, lr_0 = 7.6647e-04
Loss = 4.6927e-01, PNorm = 47.8151, GNorm = 1.2362, lr_0 = 7.6594e-04
Loss = 5.0735e-01, PNorm = 47.8346, GNorm = 1.7049, lr_0 = 7.6542e-04
Loss = 5.0966e-01, PNorm = 47.8549, GNorm = 1.3589, lr_0 = 7.6489e-04
Loss = 4.9051e-01, PNorm = 47.8737, GNorm = 1.1624, lr_0 = 7.6437e-04
Loss = 4.5894e-01, PNorm = 47.9044, GNorm = 1.3797, lr_0 = 7.6385e-04
Loss = 3.8115e-01, PNorm = 47.9290, GNorm = 0.8447, lr_0 = 7.6332e-04
Loss = 5.2501e-01, PNorm = 47.9462, GNorm = 1.2476, lr_0 = 7.6280e-04
Loss = 4.5470e-01, PNorm = 47.9696, GNorm = 1.3387, lr_0 = 7.6228e-04
Loss = 5.2052e-01, PNorm = 47.9911, GNorm = 1.3968, lr_0 = 7.6176e-04
Loss = 4.8451e-01, PNorm = 48.0132, GNorm = 2.1266, lr_0 = 7.6123e-04
Loss = 5.8712e-01, PNorm = 48.0344, GNorm = 1.4242, lr_0 = 7.6071e-04
Loss = 5.1472e-01, PNorm = 48.0534, GNorm = 1.6985, lr_0 = 7.6019e-04
Loss = 3.9938e-01, PNorm = 48.0618, GNorm = 1.9803, lr_0 = 7.5967e-04
Loss = 3.9044e-01, PNorm = 48.0801, GNorm = 1.9185, lr_0 = 7.5915e-04
Loss = 4.2402e-01, PNorm = 48.0977, GNorm = 1.1302, lr_0 = 7.5863e-04
Loss = 4.5621e-01, PNorm = 48.1200, GNorm = 0.9899, lr_0 = 7.5811e-04
Loss = 4.4874e-01, PNorm = 48.1382, GNorm = 1.2585, lr_0 = 7.5759e-04
Loss = 4.0459e-01, PNorm = 48.1503, GNorm = 1.1448, lr_0 = 7.5707e-04
Loss = 5.8388e-01, PNorm = 48.1584, GNorm = 2.3960, lr_0 = 7.5655e-04
Loss = 4.1381e-01, PNorm = 48.1673, GNorm = 1.6189, lr_0 = 7.5603e-04
Loss = 5.0485e-01, PNorm = 48.1799, GNorm = 0.9172, lr_0 = 7.5552e-04
Loss = 5.3254e-01, PNorm = 48.2018, GNorm = 1.6733, lr_0 = 7.5500e-04
Loss = 4.8059e-01, PNorm = 48.2268, GNorm = 1.2168, lr_0 = 7.5448e-04
Loss = 4.7574e-01, PNorm = 48.2461, GNorm = 1.1601, lr_0 = 7.5397e-04
Loss = 4.8866e-01, PNorm = 48.2651, GNorm = 1.8559, lr_0 = 7.5345e-04
Loss = 4.5459e-01, PNorm = 48.2823, GNorm = 1.1428, lr_0 = 7.5293e-04
Loss = 4.9389e-01, PNorm = 48.2997, GNorm = 2.0220, lr_0 = 7.5242e-04
Loss = 4.7859e-01, PNorm = 48.3217, GNorm = 1.9589, lr_0 = 7.5190e-04
Loss = 4.3638e-01, PNorm = 48.3394, GNorm = 1.6740, lr_0 = 7.5139e-04
Loss = 4.2435e-01, PNorm = 48.3550, GNorm = 1.5666, lr_0 = 7.5087e-04
Loss = 4.9543e-01, PNorm = 48.3737, GNorm = 1.7036, lr_0 = 7.5036e-04
Loss = 4.9513e-01, PNorm = 48.3907, GNorm = 1.9639, lr_0 = 7.4984e-04
Loss = 4.7944e-01, PNorm = 48.4078, GNorm = 1.5929, lr_0 = 7.4933e-04
Loss = 4.5021e-01, PNorm = 48.4241, GNorm = 1.0014, lr_0 = 7.4882e-04
Loss = 5.0954e-01, PNorm = 48.4451, GNorm = 1.7380, lr_0 = 7.4830e-04
Loss = 4.3262e-01, PNorm = 48.4583, GNorm = 1.8380, lr_0 = 7.4779e-04
Loss = 4.6575e-01, PNorm = 48.4748, GNorm = 1.1271, lr_0 = 7.4728e-04
Loss = 5.2931e-01, PNorm = 48.4952, GNorm = 2.3885, lr_0 = 7.4677e-04
Loss = 4.5726e-01, PNorm = 48.5147, GNorm = 1.6731, lr_0 = 7.4625e-04
Loss = 4.7500e-01, PNorm = 48.5303, GNorm = 0.9329, lr_0 = 7.4574e-04
Loss = 4.1496e-01, PNorm = 48.5439, GNorm = 0.7627, lr_0 = 7.4523e-04
Loss = 5.1735e-01, PNorm = 48.5560, GNorm = 2.4338, lr_0 = 7.4472e-04
Loss = 4.4818e-01, PNorm = 48.5705, GNorm = 1.0749, lr_0 = 7.4421e-04
Loss = 5.0915e-01, PNorm = 48.5885, GNorm = 2.1777, lr_0 = 7.4370e-04
Loss = 4.5207e-01, PNorm = 48.6026, GNorm = 1.7326, lr_0 = 7.4319e-04
Loss = 4.8839e-01, PNorm = 48.6169, GNorm = 1.8137, lr_0 = 7.4268e-04
Loss = 4.8941e-01, PNorm = 48.6273, GNorm = 1.5501, lr_0 = 7.4217e-04
Loss = 4.5286e-01, PNorm = 48.6389, GNorm = 1.5003, lr_0 = 7.4167e-04
Loss = 4.4512e-01, PNorm = 48.6476, GNorm = 1.2849, lr_0 = 7.4116e-04
Loss = 4.4312e-01, PNorm = 48.6588, GNorm = 1.4122, lr_0 = 7.4065e-04
Loss = 5.2733e-01, PNorm = 48.6702, GNorm = 1.0111, lr_0 = 7.4014e-04
Loss = 4.9313e-01, PNorm = 48.6868, GNorm = 0.9422, lr_0 = 7.3964e-04
Loss = 4.9873e-01, PNorm = 48.7112, GNorm = 2.2470, lr_0 = 7.3913e-04
Loss = 4.8306e-01, PNorm = 48.7311, GNorm = 1.9619, lr_0 = 7.3862e-04
Loss = 4.5859e-01, PNorm = 48.7481, GNorm = 1.2269, lr_0 = 7.3812e-04
Loss = 4.8364e-01, PNorm = 48.7655, GNorm = 1.6577, lr_0 = 7.3761e-04
Loss = 5.0798e-01, PNorm = 48.7822, GNorm = 0.9668, lr_0 = 7.3711e-04
Loss = 4.2832e-01, PNorm = 48.7971, GNorm = 0.8844, lr_0 = 7.3660e-04
Loss = 4.7879e-01, PNorm = 48.8122, GNorm = 1.9187, lr_0 = 7.3610e-04
Loss = 4.5788e-01, PNorm = 48.8275, GNorm = 1.4904, lr_0 = 7.3559e-04
Loss = 4.9357e-01, PNorm = 48.8456, GNorm = 1.6197, lr_0 = 7.3509e-04
Loss = 4.6227e-01, PNorm = 48.8671, GNorm = 2.2565, lr_0 = 7.3458e-04
Loss = 4.3808e-01, PNorm = 48.8830, GNorm = 1.8124, lr_0 = 7.3408e-04
Loss = 4.5190e-01, PNorm = 48.8890, GNorm = 1.2843, lr_0 = 7.3358e-04
Loss = 4.7053e-01, PNorm = 48.9046, GNorm = 1.5737, lr_0 = 7.3308e-04
Loss = 5.0851e-01, PNorm = 48.9195, GNorm = 1.0143, lr_0 = 7.3257e-04
Loss = 5.3519e-01, PNorm = 48.9315, GNorm = 1.8514, lr_0 = 7.3207e-04
Loss = 4.8092e-01, PNorm = 48.9474, GNorm = 1.5973, lr_0 = 7.3157e-04
Loss = 4.8585e-01, PNorm = 48.9672, GNorm = 1.0830, lr_0 = 7.3107e-04
Loss = 4.5827e-01, PNorm = 48.9845, GNorm = 1.0582, lr_0 = 7.3057e-04
Loss = 4.5610e-01, PNorm = 49.0001, GNorm = 2.0343, lr_0 = 7.3007e-04
Loss = 4.5948e-01, PNorm = 49.0092, GNorm = 1.8705, lr_0 = 7.2957e-04
Loss = 4.5935e-01, PNorm = 49.0275, GNorm = 1.7967, lr_0 = 7.2907e-04
Loss = 4.5977e-01, PNorm = 49.0425, GNorm = 1.5383, lr_0 = 7.2857e-04
Loss = 5.6623e-01, PNorm = 49.0627, GNorm = 1.9806, lr_0 = 7.2807e-04
Loss = 4.9094e-01, PNorm = 49.0800, GNorm = 1.0480, lr_0 = 7.2757e-04
Loss = 5.3281e-01, PNorm = 49.0959, GNorm = 1.4073, lr_0 = 7.2707e-04
Loss = 5.0180e-01, PNorm = 49.1113, GNorm = 2.1123, lr_0 = 7.2657e-04
Loss = 4.9215e-01, PNorm = 49.1247, GNorm = 3.3229, lr_0 = 7.2608e-04
Loss = 4.4867e-01, PNorm = 49.1361, GNorm = 1.2738, lr_0 = 7.2558e-04
Loss = 4.6534e-01, PNorm = 49.1562, GNorm = 1.2882, lr_0 = 7.2508e-04
Loss = 4.3561e-01, PNorm = 49.1679, GNorm = 3.6103, lr_0 = 7.2458e-04
Loss = 4.7169e-01, PNorm = 49.1882, GNorm = 0.7422, lr_0 = 7.2409e-04
Loss = 4.5226e-01, PNorm = 49.2083, GNorm = 1.3586, lr_0 = 7.2359e-04
Loss = 4.5801e-01, PNorm = 49.2257, GNorm = 1.6263, lr_0 = 7.2310e-04
Loss = 4.7788e-01, PNorm = 49.2405, GNorm = 1.7096, lr_0 = 7.2260e-04
Loss = 4.6784e-01, PNorm = 49.2541, GNorm = 1.0985, lr_0 = 7.2211e-04
Loss = 4.8147e-01, PNorm = 49.2621, GNorm = 1.2172, lr_0 = 7.2161e-04
Loss = 4.6624e-01, PNorm = 49.2818, GNorm = 1.3268, lr_0 = 7.2112e-04
Loss = 4.3604e-01, PNorm = 49.3004, GNorm = 0.9619, lr_0 = 7.2062e-04
Loss = 4.5242e-01, PNorm = 49.3176, GNorm = 2.5319, lr_0 = 7.2013e-04
Loss = 4.1621e-01, PNorm = 49.3336, GNorm = 1.1703, lr_0 = 7.1964e-04
Validation mae = 0.123506
Epoch 6
Loss = 4.6584e-01, PNorm = 49.3518, GNorm = 1.1086, lr_0 = 7.1914e-04
Loss = 4.8762e-01, PNorm = 49.3755, GNorm = 1.2517, lr_0 = 7.1865e-04
Loss = 4.4610e-01, PNorm = 49.3958, GNorm = 1.0092, lr_0 = 7.1816e-04
Loss = 4.8436e-01, PNorm = 49.4170, GNorm = 1.8619, lr_0 = 7.1767e-04
Loss = 4.3862e-01, PNorm = 49.4429, GNorm = 2.4490, lr_0 = 7.1717e-04
Loss = 4.3822e-01, PNorm = 49.4580, GNorm = 1.8493, lr_0 = 7.1668e-04
Loss = 4.8101e-01, PNorm = 49.4750, GNorm = 1.3885, lr_0 = 7.1619e-04
Loss = 4.4225e-01, PNorm = 49.4884, GNorm = 1.5913, lr_0 = 7.1570e-04
Loss = 4.4971e-01, PNorm = 49.5076, GNorm = 1.6631, lr_0 = 7.1521e-04
Loss = 5.1735e-01, PNorm = 49.5232, GNorm = 2.3249, lr_0 = 7.1472e-04
Loss = 4.2425e-01, PNorm = 49.5377, GNorm = 1.0081, lr_0 = 7.1423e-04
Loss = 4.3854e-01, PNorm = 49.5558, GNorm = 1.4674, lr_0 = 7.1374e-04
Loss = 4.8494e-01, PNorm = 49.5664, GNorm = 1.2525, lr_0 = 7.1325e-04
Loss = 5.0101e-01, PNorm = 49.5759, GNorm = 1.7422, lr_0 = 7.1277e-04
Loss = 4.9280e-01, PNorm = 49.5886, GNorm = 1.0587, lr_0 = 7.1228e-04
Loss = 4.8262e-01, PNorm = 49.6068, GNorm = 1.2850, lr_0 = 7.1179e-04
Loss = 4.3681e-01, PNorm = 49.6234, GNorm = 1.3370, lr_0 = 7.1130e-04
Loss = 4.7479e-01, PNorm = 49.6410, GNorm = 1.4552, lr_0 = 7.1081e-04
Loss = 4.5057e-01, PNorm = 49.6603, GNorm = 1.0788, lr_0 = 7.1033e-04
Loss = 5.2397e-01, PNorm = 49.6793, GNorm = 1.5496, lr_0 = 7.0984e-04
Loss = 4.1326e-01, PNorm = 49.6902, GNorm = 1.3890, lr_0 = 7.0935e-04
Loss = 4.1788e-01, PNorm = 49.7058, GNorm = 1.1474, lr_0 = 7.0887e-04
Loss = 4.4091e-01, PNorm = 49.7194, GNorm = 1.2269, lr_0 = 7.0838e-04
Loss = 4.5101e-01, PNorm = 49.7349, GNorm = 1.2562, lr_0 = 7.0790e-04
Loss = 5.0032e-01, PNorm = 49.7470, GNorm = 1.6360, lr_0 = 7.0741e-04
Loss = 4.9113e-01, PNorm = 49.7624, GNorm = 1.3238, lr_0 = 7.0693e-04
Loss = 5.1323e-01, PNorm = 49.7751, GNorm = 1.5261, lr_0 = 7.0644e-04
Loss = 4.7526e-01, PNorm = 49.7872, GNorm = 0.9434, lr_0 = 7.0596e-04
Loss = 4.2308e-01, PNorm = 49.8051, GNorm = 2.1511, lr_0 = 7.0548e-04
Loss = 4.6000e-01, PNorm = 49.8228, GNorm = 1.2680, lr_0 = 7.0499e-04
Loss = 5.0971e-01, PNorm = 49.8473, GNorm = 1.7552, lr_0 = 7.0451e-04
Loss = 4.9383e-01, PNorm = 49.8693, GNorm = 1.4247, lr_0 = 7.0403e-04
Loss = 4.6676e-01, PNorm = 49.8826, GNorm = 1.6687, lr_0 = 7.0354e-04
Loss = 4.4588e-01, PNorm = 49.9014, GNorm = 1.0522, lr_0 = 7.0306e-04
Loss = 4.9922e-01, PNorm = 49.9186, GNorm = 1.2476, lr_0 = 7.0258e-04
Loss = 4.5608e-01, PNorm = 49.9309, GNorm = 1.3711, lr_0 = 7.0210e-04
Loss = 4.2398e-01, PNorm = 49.9467, GNorm = 2.2298, lr_0 = 7.0162e-04
Loss = 4.3771e-01, PNorm = 49.9645, GNorm = 1.0837, lr_0 = 7.0114e-04
Loss = 4.8970e-01, PNorm = 49.9861, GNorm = 0.9938, lr_0 = 7.0066e-04
Loss = 4.0694e-01, PNorm = 50.0046, GNorm = 1.5879, lr_0 = 7.0018e-04
Loss = 4.3112e-01, PNorm = 50.0172, GNorm = 1.2555, lr_0 = 6.9970e-04
Loss = 5.1935e-01, PNorm = 50.0300, GNorm = 1.2813, lr_0 = 6.9922e-04
Loss = 4.5070e-01, PNorm = 50.0416, GNorm = 1.6401, lr_0 = 6.9874e-04
Loss = 4.7355e-01, PNorm = 50.0616, GNorm = 1.7732, lr_0 = 6.9826e-04
Loss = 4.4175e-01, PNorm = 50.0811, GNorm = 1.1969, lr_0 = 6.9778e-04
Loss = 4.4845e-01, PNorm = 50.0940, GNorm = 1.4072, lr_0 = 6.9730e-04
Loss = 4.6061e-01, PNorm = 50.1172, GNorm = 1.6686, lr_0 = 6.9683e-04
Loss = 4.2873e-01, PNorm = 50.1401, GNorm = 2.0935, lr_0 = 6.9635e-04
Loss = 4.5854e-01, PNorm = 50.1568, GNorm = 1.0747, lr_0 = 6.9587e-04
Loss = 4.2412e-01, PNorm = 50.1748, GNorm = 1.6673, lr_0 = 6.9540e-04
Loss = 3.5751e-01, PNorm = 50.1917, GNorm = 1.3184, lr_0 = 6.9492e-04
Loss = 4.8909e-01, PNorm = 50.2086, GNorm = 1.0421, lr_0 = 6.9444e-04
Loss = 5.2853e-01, PNorm = 50.2204, GNorm = 1.2596, lr_0 = 6.9397e-04
Loss = 4.9856e-01, PNorm = 50.2377, GNorm = 1.2644, lr_0 = 6.9349e-04
Loss = 3.9555e-01, PNorm = 50.2469, GNorm = 0.8953, lr_0 = 6.9302e-04
Loss = 4.4908e-01, PNorm = 50.2638, GNorm = 1.5550, lr_0 = 6.9254e-04
Loss = 4.7654e-01, PNorm = 50.2820, GNorm = 1.0598, lr_0 = 6.9207e-04
Loss = 4.2672e-01, PNorm = 50.2927, GNorm = 1.4473, lr_0 = 6.9159e-04
Loss = 4.0337e-01, PNorm = 50.3075, GNorm = 2.4732, lr_0 = 6.9112e-04
Loss = 4.3749e-01, PNorm = 50.3246, GNorm = 1.1409, lr_0 = 6.9065e-04
Loss = 5.2832e-01, PNorm = 50.3433, GNorm = 1.5779, lr_0 = 6.9017e-04
Loss = 4.3231e-01, PNorm = 50.3629, GNorm = 1.2045, lr_0 = 6.8970e-04
Loss = 4.2050e-01, PNorm = 50.3806, GNorm = 1.1368, lr_0 = 6.8923e-04
Loss = 4.6907e-01, PNorm = 50.3921, GNorm = 1.6682, lr_0 = 6.8876e-04
Loss = 4.6835e-01, PNorm = 50.4078, GNorm = 2.7410, lr_0 = 6.8828e-04
Loss = 4.5988e-01, PNorm = 50.4201, GNorm = 1.0973, lr_0 = 6.8781e-04
Loss = 4.5111e-01, PNorm = 50.4405, GNorm = 1.2730, lr_0 = 6.8734e-04
Loss = 4.5618e-01, PNorm = 50.4523, GNorm = 1.6171, lr_0 = 6.8687e-04
Loss = 4.6367e-01, PNorm = 50.4631, GNorm = 1.0851, lr_0 = 6.8640e-04
Loss = 4.9624e-01, PNorm = 50.4762, GNorm = 1.3156, lr_0 = 6.8593e-04
Loss = 4.1728e-01, PNorm = 50.4834, GNorm = 1.8389, lr_0 = 6.8546e-04
Loss = 5.3459e-01, PNorm = 50.4957, GNorm = 1.4962, lr_0 = 6.8499e-04
Loss = 3.8334e-01, PNorm = 50.5097, GNorm = 1.5208, lr_0 = 6.8452e-04
Loss = 5.1939e-01, PNorm = 50.5210, GNorm = 1.4302, lr_0 = 6.8405e-04
Loss = 4.9144e-01, PNorm = 50.5315, GNorm = 2.2144, lr_0 = 6.8358e-04
Loss = 4.5794e-01, PNorm = 50.5480, GNorm = 1.3747, lr_0 = 6.8312e-04
Loss = 4.0897e-01, PNorm = 50.5652, GNorm = 1.0746, lr_0 = 6.8265e-04
Loss = 4.4216e-01, PNorm = 50.5794, GNorm = 2.0579, lr_0 = 6.8218e-04
Loss = 4.7584e-01, PNorm = 50.5887, GNorm = 1.5399, lr_0 = 6.8171e-04
Loss = 4.2906e-01, PNorm = 50.6021, GNorm = 1.3641, lr_0 = 6.8125e-04
Loss = 4.5894e-01, PNorm = 50.6218, GNorm = 1.4870, lr_0 = 6.8078e-04
Loss = 4.5720e-01, PNorm = 50.6353, GNorm = 1.6861, lr_0 = 6.8031e-04
Loss = 3.8553e-01, PNorm = 50.6511, GNorm = 1.0936, lr_0 = 6.7985e-04
Loss = 4.1777e-01, PNorm = 50.6632, GNorm = 1.3711, lr_0 = 6.7938e-04
Loss = 4.8208e-01, PNorm = 50.6787, GNorm = 1.1871, lr_0 = 6.7892e-04
Loss = 4.2274e-01, PNorm = 50.6987, GNorm = 2.0874, lr_0 = 6.7845e-04
Loss = 4.0643e-01, PNorm = 50.7149, GNorm = 0.9512, lr_0 = 6.7799e-04
Loss = 4.1527e-01, PNorm = 50.7256, GNorm = 1.3615, lr_0 = 6.7752e-04
Loss = 4.1925e-01, PNorm = 50.7289, GNorm = 1.3766, lr_0 = 6.7706e-04
Loss = 4.1796e-01, PNorm = 50.7395, GNorm = 1.3803, lr_0 = 6.7659e-04
Loss = 4.4662e-01, PNorm = 50.7508, GNorm = 1.5345, lr_0 = 6.7613e-04
Loss = 4.3099e-01, PNorm = 50.7613, GNorm = 1.6148, lr_0 = 6.7567e-04
Loss = 4.5026e-01, PNorm = 50.7699, GNorm = 1.5520, lr_0 = 6.7520e-04
Loss = 5.0252e-01, PNorm = 50.7832, GNorm = 1.2313, lr_0 = 6.7474e-04
Loss = 4.7945e-01, PNorm = 50.7948, GNorm = 1.9009, lr_0 = 6.7428e-04
Loss = 4.5844e-01, PNorm = 50.8087, GNorm = 1.3207, lr_0 = 6.7382e-04
Loss = 5.0464e-01, PNorm = 50.8162, GNorm = 1.0758, lr_0 = 6.7335e-04
Loss = 5.1287e-01, PNorm = 50.8284, GNorm = 1.2933, lr_0 = 6.7289e-04
Loss = 4.5505e-01, PNorm = 50.8482, GNorm = 1.0630, lr_0 = 6.7243e-04
Loss = 4.5265e-01, PNorm = 50.8644, GNorm = 1.1392, lr_0 = 6.7197e-04
Loss = 5.5170e-01, PNorm = 50.8697, GNorm = 1.2424, lr_0 = 6.7151e-04
Loss = 4.3079e-01, PNorm = 50.8911, GNorm = 1.6748, lr_0 = 6.7105e-04
Loss = 5.3594e-01, PNorm = 50.9087, GNorm = 2.2618, lr_0 = 6.7059e-04
Loss = 5.1155e-01, PNorm = 50.9281, GNorm = 1.9402, lr_0 = 6.7013e-04
Loss = 4.7286e-01, PNorm = 50.9493, GNorm = 1.4052, lr_0 = 6.6967e-04
Loss = 4.8838e-01, PNorm = 50.9637, GNorm = 1.1664, lr_0 = 6.6921e-04
Loss = 4.4433e-01, PNorm = 50.9729, GNorm = 1.4444, lr_0 = 6.6876e-04
Loss = 4.3093e-01, PNorm = 50.9780, GNorm = 1.3104, lr_0 = 6.6830e-04
Loss = 4.1095e-01, PNorm = 50.9911, GNorm = 1.7111, lr_0 = 6.6784e-04
Loss = 4.0743e-01, PNorm = 51.0084, GNorm = 1.1178, lr_0 = 6.6738e-04
Loss = 5.6660e-01, PNorm = 51.0168, GNorm = 1.3133, lr_0 = 6.6693e-04
Loss = 3.9881e-01, PNorm = 51.0323, GNorm = 2.6573, lr_0 = 6.6647e-04
Loss = 4.3479e-01, PNorm = 51.0445, GNorm = 1.2954, lr_0 = 6.6601e-04
Loss = 4.4484e-01, PNorm = 51.0588, GNorm = 1.1847, lr_0 = 6.6556e-04
Loss = 4.7823e-01, PNorm = 51.0711, GNorm = 1.2302, lr_0 = 6.6510e-04
Loss = 4.3133e-01, PNorm = 51.0852, GNorm = 1.3671, lr_0 = 6.6464e-04
Loss = 4.6394e-01, PNorm = 51.1126, GNorm = 2.2982, lr_0 = 6.6419e-04
Loss = 4.5183e-01, PNorm = 51.1337, GNorm = 1.2432, lr_0 = 6.6373e-04
Loss = 4.6676e-01, PNorm = 51.1502, GNorm = 1.5349, lr_0 = 6.6328e-04
Loss = 5.7693e-01, PNorm = 51.1674, GNorm = 1.3645, lr_0 = 6.6282e-04
Validation mae = 0.118753
Epoch 7
Loss = 4.4227e-01, PNorm = 51.1720, GNorm = 1.6492, lr_0 = 6.6237e-04
Loss = 4.5382e-01, PNorm = 51.1791, GNorm = 1.1115, lr_0 = 6.6192e-04
Loss = 4.2542e-01, PNorm = 51.1966, GNorm = 1.3974, lr_0 = 6.6146e-04
Loss = 5.1787e-01, PNorm = 51.2134, GNorm = 1.3219, lr_0 = 6.6101e-04
Loss = 4.7495e-01, PNorm = 51.2285, GNorm = 1.7721, lr_0 = 6.6056e-04
Loss = 3.5358e-01, PNorm = 51.2451, GNorm = 0.8578, lr_0 = 6.6011e-04
Loss = 4.2906e-01, PNorm = 51.2608, GNorm = 0.9624, lr_0 = 6.5965e-04
Loss = 3.7590e-01, PNorm = 51.2736, GNorm = 1.3156, lr_0 = 6.5920e-04
Loss = 5.0841e-01, PNorm = 51.2848, GNorm = 1.7119, lr_0 = 6.5875e-04
Loss = 3.9307e-01, PNorm = 51.2973, GNorm = 1.3552, lr_0 = 6.5830e-04
Loss = 5.4380e-01, PNorm = 51.3180, GNorm = 3.0399, lr_0 = 6.5785e-04
Loss = 4.4237e-01, PNorm = 51.3451, GNorm = 1.1721, lr_0 = 6.5740e-04
Loss = 4.7408e-01, PNorm = 51.3658, GNorm = 1.4209, lr_0 = 6.5695e-04
Loss = 4.4970e-01, PNorm = 51.3796, GNorm = 1.2592, lr_0 = 6.5650e-04
Loss = 4.6412e-01, PNorm = 51.3955, GNorm = 2.1788, lr_0 = 6.5605e-04
Loss = 4.2853e-01, PNorm = 51.4094, GNorm = 1.5109, lr_0 = 6.5560e-04
Loss = 4.7090e-01, PNorm = 51.4249, GNorm = 1.4060, lr_0 = 6.5515e-04
Loss = 4.3083e-01, PNorm = 51.4339, GNorm = 1.6896, lr_0 = 6.5470e-04
Loss = 4.3264e-01, PNorm = 51.4415, GNorm = 1.2248, lr_0 = 6.5425e-04
Loss = 4.0880e-01, PNorm = 51.4480, GNorm = 1.1082, lr_0 = 6.5380e-04
Loss = 4.4976e-01, PNorm = 51.4531, GNorm = 1.8988, lr_0 = 6.5335e-04
Loss = 3.9472e-01, PNorm = 51.4670, GNorm = 1.6295, lr_0 = 6.5291e-04
Loss = 4.4706e-01, PNorm = 51.4855, GNorm = 2.6159, lr_0 = 6.5246e-04
Loss = 4.3767e-01, PNorm = 51.4915, GNorm = 2.1258, lr_0 = 6.5201e-04
Loss = 4.6583e-01, PNorm = 51.5102, GNorm = 1.1869, lr_0 = 6.5157e-04
Loss = 4.2917e-01, PNorm = 51.5347, GNorm = 1.9975, lr_0 = 6.5112e-04
Loss = 4.7127e-01, PNorm = 51.5436, GNorm = 1.4254, lr_0 = 6.5067e-04
Loss = 3.8287e-01, PNorm = 51.5543, GNorm = 1.3413, lr_0 = 6.5023e-04
Loss = 4.5426e-01, PNorm = 51.5701, GNorm = 1.2900, lr_0 = 6.4978e-04
Loss = 4.0689e-01, PNorm = 51.5900, GNorm = 0.9602, lr_0 = 6.4934e-04
Loss = 4.9612e-01, PNorm = 51.6081, GNorm = 2.1624, lr_0 = 6.4889e-04
Loss = 4.4727e-01, PNorm = 51.6198, GNorm = 1.1571, lr_0 = 6.4845e-04
Loss = 4.2177e-01, PNorm = 51.6305, GNorm = 1.9824, lr_0 = 6.4800e-04
Loss = 4.2145e-01, PNorm = 51.6446, GNorm = 1.1577, lr_0 = 6.4756e-04
Loss = 4.8715e-01, PNorm = 51.6565, GNorm = 1.1638, lr_0 = 6.4712e-04
Loss = 4.2971e-01, PNorm = 51.6663, GNorm = 1.0635, lr_0 = 6.4667e-04
Loss = 4.3167e-01, PNorm = 51.6760, GNorm = 1.1207, lr_0 = 6.4623e-04
Loss = 4.1093e-01, PNorm = 51.6777, GNorm = 1.4754, lr_0 = 6.4579e-04
Loss = 4.5250e-01, PNorm = 51.6828, GNorm = 2.0483, lr_0 = 6.4534e-04
Loss = 4.4330e-01, PNorm = 51.6940, GNorm = 1.0181, lr_0 = 6.4490e-04
Loss = 4.2550e-01, PNorm = 51.7059, GNorm = 1.3544, lr_0 = 6.4446e-04
Loss = 4.0670e-01, PNorm = 51.7145, GNorm = 1.3221, lr_0 = 6.4402e-04
Loss = 4.2955e-01, PNorm = 51.7314, GNorm = 1.3296, lr_0 = 6.4358e-04
Loss = 4.3634e-01, PNorm = 51.7449, GNorm = 1.3156, lr_0 = 6.4314e-04
Loss = 4.1327e-01, PNorm = 51.7516, GNorm = 1.3197, lr_0 = 6.4270e-04
Loss = 4.8422e-01, PNorm = 51.7674, GNorm = 1.6955, lr_0 = 6.4226e-04
Loss = 4.9019e-01, PNorm = 51.7721, GNorm = 1.4788, lr_0 = 6.4182e-04
Loss = 4.2341e-01, PNorm = 51.7806, GNorm = 0.8973, lr_0 = 6.4138e-04
Loss = 4.7346e-01, PNorm = 51.7971, GNorm = 1.9641, lr_0 = 6.4094e-04
Loss = 4.1486e-01, PNorm = 51.8087, GNorm = 1.6302, lr_0 = 6.4050e-04
Loss = 3.9345e-01, PNorm = 51.8221, GNorm = 1.3853, lr_0 = 6.4006e-04
Loss = 4.1566e-01, PNorm = 51.8430, GNorm = 1.3651, lr_0 = 6.3962e-04
Loss = 5.0997e-01, PNorm = 51.8609, GNorm = 1.6320, lr_0 = 6.3918e-04
Loss = 4.6437e-01, PNorm = 51.8812, GNorm = 1.9917, lr_0 = 6.3874e-04
Loss = 4.6201e-01, PNorm = 51.8989, GNorm = 1.3601, lr_0 = 6.3831e-04
Loss = 4.3699e-01, PNorm = 51.9061, GNorm = 1.0929, lr_0 = 6.3787e-04
Loss = 4.2562e-01, PNorm = 51.9149, GNorm = 1.1931, lr_0 = 6.3743e-04
Loss = 4.5996e-01, PNorm = 51.9329, GNorm = 1.3309, lr_0 = 6.3700e-04
Loss = 4.3081e-01, PNorm = 51.9420, GNorm = 1.4554, lr_0 = 6.3656e-04
Loss = 4.2331e-01, PNorm = 51.9509, GNorm = 1.4750, lr_0 = 6.3612e-04
Loss = 3.9291e-01, PNorm = 51.9563, GNorm = 1.2477, lr_0 = 6.3569e-04
Loss = 4.6960e-01, PNorm = 51.9667, GNorm = 1.3508, lr_0 = 6.3525e-04
Loss = 4.1391e-01, PNorm = 51.9749, GNorm = 1.2873, lr_0 = 6.3482e-04
Loss = 4.9764e-01, PNorm = 51.9933, GNorm = 0.9914, lr_0 = 6.3438e-04
Loss = 4.2608e-01, PNorm = 52.0121, GNorm = 1.2513, lr_0 = 6.3395e-04
Loss = 4.2481e-01, PNorm = 52.0212, GNorm = 1.3232, lr_0 = 6.3351e-04
Loss = 4.7817e-01, PNorm = 52.0386, GNorm = 1.2631, lr_0 = 6.3308e-04
Loss = 4.3341e-01, PNorm = 52.0531, GNorm = 1.6013, lr_0 = 6.3265e-04
Loss = 4.5569e-01, PNorm = 52.0596, GNorm = 1.0584, lr_0 = 6.3221e-04
Loss = 4.0095e-01, PNorm = 52.0764, GNorm = 1.7736, lr_0 = 6.3178e-04
Loss = 3.5589e-01, PNorm = 52.0869, GNorm = 1.0019, lr_0 = 6.3135e-04
Loss = 4.3784e-01, PNorm = 52.1038, GNorm = 1.3007, lr_0 = 6.3091e-04
Loss = 4.5696e-01, PNorm = 52.1296, GNorm = 1.9351, lr_0 = 6.3048e-04
Loss = 3.8943e-01, PNorm = 52.1490, GNorm = 1.9322, lr_0 = 6.3005e-04
Loss = 3.9290e-01, PNorm = 52.1664, GNorm = 1.3357, lr_0 = 6.2962e-04
Loss = 4.1544e-01, PNorm = 52.1759, GNorm = 1.1654, lr_0 = 6.2919e-04
Loss = 4.7472e-01, PNorm = 52.1944, GNorm = 2.0886, lr_0 = 6.2876e-04
Loss = 3.8747e-01, PNorm = 52.2109, GNorm = 1.2153, lr_0 = 6.2833e-04
Loss = 4.1201e-01, PNorm = 52.2221, GNorm = 1.7774, lr_0 = 6.2789e-04
Loss = 4.9099e-01, PNorm = 52.2275, GNorm = 1.8888, lr_0 = 6.2746e-04
Loss = 4.5225e-01, PNorm = 52.2407, GNorm = 1.5864, lr_0 = 6.2703e-04
Loss = 4.5551e-01, PNorm = 52.2511, GNorm = 1.1222, lr_0 = 6.2661e-04
Loss = 4.0484e-01, PNorm = 52.2593, GNorm = 1.1209, lr_0 = 6.2618e-04
Loss = 5.1803e-01, PNorm = 52.2731, GNorm = 1.7155, lr_0 = 6.2575e-04
Loss = 4.2744e-01, PNorm = 52.2859, GNorm = 1.5945, lr_0 = 6.2532e-04
Loss = 4.1762e-01, PNorm = 52.2947, GNorm = 0.8868, lr_0 = 6.2489e-04
Loss = 4.7353e-01, PNorm = 52.3045, GNorm = 0.8657, lr_0 = 6.2446e-04
Loss = 4.6086e-01, PNorm = 52.3182, GNorm = 0.9255, lr_0 = 6.2403e-04
Loss = 4.2624e-01, PNorm = 52.3338, GNorm = 1.4564, lr_0 = 6.2361e-04
Loss = 4.5554e-01, PNorm = 52.3482, GNorm = 2.1153, lr_0 = 6.2318e-04
Loss = 4.7407e-01, PNorm = 52.3644, GNorm = 1.2684, lr_0 = 6.2275e-04
Loss = 4.3193e-01, PNorm = 52.3811, GNorm = 1.1733, lr_0 = 6.2233e-04
Loss = 4.3330e-01, PNorm = 52.3880, GNorm = 1.5280, lr_0 = 6.2190e-04
Loss = 4.8292e-01, PNorm = 52.3979, GNorm = 0.9351, lr_0 = 6.2147e-04
Loss = 4.8188e-01, PNorm = 52.4135, GNorm = 1.1722, lr_0 = 6.2105e-04
Loss = 4.6048e-01, PNorm = 52.4278, GNorm = 1.4243, lr_0 = 6.2062e-04
Loss = 5.0294e-01, PNorm = 52.4372, GNorm = 1.4189, lr_0 = 6.2020e-04
Loss = 4.7822e-01, PNorm = 52.4478, GNorm = 0.6853, lr_0 = 6.1977e-04
Loss = 4.7696e-01, PNorm = 52.4581, GNorm = 1.2447, lr_0 = 6.1935e-04
Loss = 4.1114e-01, PNorm = 52.4690, GNorm = 1.0498, lr_0 = 6.1892e-04
Loss = 4.1798e-01, PNorm = 52.4873, GNorm = 1.2924, lr_0 = 6.1850e-04
Loss = 4.4456e-01, PNorm = 52.5007, GNorm = 1.2804, lr_0 = 6.1808e-04
Loss = 4.8599e-01, PNorm = 52.5160, GNorm = 2.8396, lr_0 = 6.1765e-04
Loss = 4.4811e-01, PNorm = 52.5288, GNorm = 1.4968, lr_0 = 6.1723e-04
Loss = 4.6559e-01, PNorm = 52.5396, GNorm = 1.4467, lr_0 = 6.1681e-04
Loss = 4.5043e-01, PNorm = 52.5516, GNorm = 1.2295, lr_0 = 6.1638e-04
Loss = 4.7641e-01, PNorm = 52.5651, GNorm = 1.4149, lr_0 = 6.1596e-04
Loss = 5.2619e-01, PNorm = 52.5786, GNorm = 2.2355, lr_0 = 6.1554e-04
Loss = 4.5654e-01, PNorm = 52.5948, GNorm = 3.0104, lr_0 = 6.1512e-04
Loss = 4.9568e-01, PNorm = 52.6130, GNorm = 2.0900, lr_0 = 6.1470e-04
Loss = 4.6321e-01, PNorm = 52.6252, GNorm = 1.4072, lr_0 = 6.1428e-04
Loss = 4.0495e-01, PNorm = 52.6387, GNorm = 1.2977, lr_0 = 6.1385e-04
Loss = 4.4484e-01, PNorm = 52.6478, GNorm = 1.1530, lr_0 = 6.1343e-04
Loss = 4.2757e-01, PNorm = 52.6615, GNorm = 1.2226, lr_0 = 6.1301e-04
Loss = 4.4281e-01, PNorm = 52.6731, GNorm = 1.3515, lr_0 = 6.1259e-04
Loss = 5.3418e-01, PNorm = 52.6851, GNorm = 1.4724, lr_0 = 6.1217e-04
Loss = 3.8404e-01, PNorm = 52.7055, GNorm = 1.5299, lr_0 = 6.1175e-04
Loss = 4.5669e-01, PNorm = 52.7157, GNorm = 1.8646, lr_0 = 6.1134e-04
Loss = 4.3186e-01, PNorm = 52.7216, GNorm = 1.5498, lr_0 = 6.1092e-04
Loss = 4.3886e-01, PNorm = 52.7326, GNorm = 2.0654, lr_0 = 6.1050e-04
Validation mae = 0.117021
Epoch 8
Loss = 4.9518e-01, PNorm = 52.7454, GNorm = 1.2940, lr_0 = 6.1008e-04
Loss = 4.9755e-01, PNorm = 52.7546, GNorm = 2.8830, lr_0 = 6.0966e-04
Loss = 4.3832e-01, PNorm = 52.7673, GNorm = 1.0778, lr_0 = 6.0924e-04
Loss = 4.9420e-01, PNorm = 52.7744, GNorm = 1.2113, lr_0 = 6.0883e-04
Loss = 3.5673e-01, PNorm = 52.7887, GNorm = 1.4183, lr_0 = 6.0841e-04
Loss = 3.9926e-01, PNorm = 52.8027, GNorm = 2.0506, lr_0 = 6.0799e-04
Loss = 4.8216e-01, PNorm = 52.8153, GNorm = 1.1192, lr_0 = 6.0758e-04
Loss = 4.8182e-01, PNorm = 52.8277, GNorm = 1.4365, lr_0 = 6.0716e-04
Loss = 4.4391e-01, PNorm = 52.8388, GNorm = 1.9292, lr_0 = 6.0674e-04
Loss = 4.3463e-01, PNorm = 52.8521, GNorm = 1.1684, lr_0 = 6.0633e-04
Loss = 4.7787e-01, PNorm = 52.8584, GNorm = 1.8455, lr_0 = 6.0591e-04
Loss = 4.5944e-01, PNorm = 52.8724, GNorm = 1.5678, lr_0 = 6.0550e-04
Loss = 4.8511e-01, PNorm = 52.8871, GNorm = 1.7681, lr_0 = 6.0508e-04
Loss = 4.4352e-01, PNorm = 52.9040, GNorm = 1.3210, lr_0 = 6.0467e-04
Loss = 4.3714e-01, PNorm = 52.9253, GNorm = 1.7801, lr_0 = 6.0425e-04
Loss = 4.2376e-01, PNorm = 52.9350, GNorm = 1.1842, lr_0 = 6.0384e-04
Loss = 3.9753e-01, PNorm = 52.9509, GNorm = 1.4593, lr_0 = 6.0343e-04
Loss = 4.3582e-01, PNorm = 52.9646, GNorm = 1.1582, lr_0 = 6.0301e-04
Loss = 4.2283e-01, PNorm = 52.9842, GNorm = 0.8733, lr_0 = 6.0260e-04
Loss = 4.6871e-01, PNorm = 52.9982, GNorm = 1.3823, lr_0 = 6.0219e-04
Loss = 5.1677e-01, PNorm = 53.0125, GNorm = 2.9120, lr_0 = 6.0178e-04
Loss = 4.5269e-01, PNorm = 53.0184, GNorm = 1.1006, lr_0 = 6.0136e-04
Loss = 4.2620e-01, PNorm = 53.0209, GNorm = 1.6978, lr_0 = 6.0095e-04
Loss = 4.3477e-01, PNorm = 53.0315, GNorm = 1.0268, lr_0 = 6.0054e-04
Loss = 4.4679e-01, PNorm = 53.0462, GNorm = 1.2995, lr_0 = 6.0013e-04
Loss = 4.2619e-01, PNorm = 53.0595, GNorm = 1.6096, lr_0 = 5.9972e-04
Loss = 4.2478e-01, PNorm = 53.0711, GNorm = 1.3073, lr_0 = 5.9931e-04
Loss = 4.3419e-01, PNorm = 53.0855, GNorm = 1.1338, lr_0 = 5.9890e-04
Loss = 4.1115e-01, PNorm = 53.0964, GNorm = 1.4208, lr_0 = 5.9849e-04
Loss = 4.3016e-01, PNorm = 53.1122, GNorm = 1.5542, lr_0 = 5.9808e-04
Loss = 3.9451e-01, PNorm = 53.1220, GNorm = 1.2960, lr_0 = 5.9767e-04
Loss = 4.1133e-01, PNorm = 53.1344, GNorm = 1.2023, lr_0 = 5.9726e-04
Loss = 3.9579e-01, PNorm = 53.1445, GNorm = 1.7410, lr_0 = 5.9685e-04
Loss = 4.3734e-01, PNorm = 53.1555, GNorm = 1.1444, lr_0 = 5.9644e-04
Loss = 4.2494e-01, PNorm = 53.1732, GNorm = 1.2291, lr_0 = 5.9603e-04
Loss = 3.5883e-01, PNorm = 53.1864, GNorm = 2.0918, lr_0 = 5.9562e-04
Loss = 3.8707e-01, PNorm = 53.2031, GNorm = 0.8974, lr_0 = 5.9521e-04
Loss = 4.7479e-01, PNorm = 53.2119, GNorm = 1.7669, lr_0 = 5.9481e-04
Loss = 4.2848e-01, PNorm = 53.2239, GNorm = 1.2877, lr_0 = 5.9440e-04
Loss = 4.1828e-01, PNorm = 53.2329, GNorm = 1.3573, lr_0 = 5.9399e-04
Loss = 4.9506e-01, PNorm = 53.2474, GNorm = 1.4877, lr_0 = 5.9358e-04
Loss = 4.8320e-01, PNorm = 53.2601, GNorm = 1.5121, lr_0 = 5.9318e-04
Loss = 3.9678e-01, PNorm = 53.2686, GNorm = 1.5086, lr_0 = 5.9277e-04
Loss = 3.7870e-01, PNorm = 53.2829, GNorm = 1.3645, lr_0 = 5.9236e-04
Loss = 4.7328e-01, PNorm = 53.2912, GNorm = 2.5229, lr_0 = 5.9196e-04
Loss = 4.2379e-01, PNorm = 53.3088, GNorm = 1.0572, lr_0 = 5.9155e-04
Loss = 4.5876e-01, PNorm = 53.3169, GNorm = 1.1757, lr_0 = 5.9115e-04
Loss = 4.4857e-01, PNorm = 53.3280, GNorm = 1.7137, lr_0 = 5.9074e-04
Loss = 3.8969e-01, PNorm = 53.3426, GNorm = 0.8560, lr_0 = 5.9034e-04
Loss = 4.2665e-01, PNorm = 53.3519, GNorm = 1.4798, lr_0 = 5.8993e-04
Loss = 4.6554e-01, PNorm = 53.3610, GNorm = 1.3401, lr_0 = 5.8953e-04
Loss = 4.4416e-01, PNorm = 53.3637, GNorm = 0.8454, lr_0 = 5.8913e-04
Loss = 4.7900e-01, PNorm = 53.3747, GNorm = 0.8703, lr_0 = 5.8872e-04
Loss = 4.7233e-01, PNorm = 53.3866, GNorm = 1.4077, lr_0 = 5.8832e-04
Loss = 4.1578e-01, PNorm = 53.4077, GNorm = 1.7009, lr_0 = 5.8792e-04
Loss = 4.1892e-01, PNorm = 53.4164, GNorm = 1.3175, lr_0 = 5.8751e-04
Loss = 4.1893e-01, PNorm = 53.4262, GNorm = 2.4775, lr_0 = 5.8711e-04
Loss = 4.1102e-01, PNorm = 53.4393, GNorm = 1.5726, lr_0 = 5.8671e-04
Loss = 4.4820e-01, PNorm = 53.4497, GNorm = 2.0568, lr_0 = 5.8631e-04
Loss = 4.4488e-01, PNorm = 53.4586, GNorm = 1.3535, lr_0 = 5.8591e-04
Loss = 4.4912e-01, PNorm = 53.4718, GNorm = 1.7527, lr_0 = 5.8550e-04
Loss = 4.0653e-01, PNorm = 53.4863, GNorm = 1.2782, lr_0 = 5.8510e-04
Loss = 4.3235e-01, PNorm = 53.4976, GNorm = 2.1166, lr_0 = 5.8470e-04
Loss = 3.9987e-01, PNorm = 53.5082, GNorm = 1.4667, lr_0 = 5.8430e-04
Loss = 4.0682e-01, PNorm = 53.5185, GNorm = 1.0707, lr_0 = 5.8390e-04
Loss = 4.0580e-01, PNorm = 53.5317, GNorm = 1.4024, lr_0 = 5.8350e-04
Loss = 4.1526e-01, PNorm = 53.5443, GNorm = 1.7318, lr_0 = 5.8310e-04
Loss = 4.4646e-01, PNorm = 53.5581, GNorm = 1.2437, lr_0 = 5.8270e-04
Loss = 4.4067e-01, PNorm = 53.5660, GNorm = 1.3923, lr_0 = 5.8230e-04
Loss = 4.1284e-01, PNorm = 53.5803, GNorm = 1.8651, lr_0 = 5.8190e-04
Loss = 4.2687e-01, PNorm = 53.5828, GNorm = 1.2979, lr_0 = 5.8151e-04
Loss = 4.4095e-01, PNorm = 53.5915, GNorm = 0.8681, lr_0 = 5.8111e-04
Loss = 4.0007e-01, PNorm = 53.6022, GNorm = 0.8640, lr_0 = 5.8071e-04
Loss = 4.0222e-01, PNorm = 53.6140, GNorm = 1.4994, lr_0 = 5.8031e-04
Loss = 3.9505e-01, PNorm = 53.6270, GNorm = 1.5030, lr_0 = 5.7991e-04
Loss = 4.0406e-01, PNorm = 53.6325, GNorm = 1.1852, lr_0 = 5.7952e-04
Loss = 3.9512e-01, PNorm = 53.6420, GNorm = 1.5819, lr_0 = 5.7912e-04
Loss = 4.0979e-01, PNorm = 53.6517, GNorm = 0.9568, lr_0 = 5.7872e-04
Loss = 3.7075e-01, PNorm = 53.6585, GNorm = 1.2711, lr_0 = 5.7833e-04
Loss = 4.4462e-01, PNorm = 53.6722, GNorm = 1.5469, lr_0 = 5.7793e-04
Loss = 4.4064e-01, PNorm = 53.6783, GNorm = 1.8225, lr_0 = 5.7753e-04
Loss = 4.3343e-01, PNorm = 53.6870, GNorm = 1.7025, lr_0 = 5.7714e-04
Loss = 4.2840e-01, PNorm = 53.6960, GNorm = 1.2931, lr_0 = 5.7674e-04
Loss = 4.3828e-01, PNorm = 53.7060, GNorm = 1.1084, lr_0 = 5.7635e-04
Loss = 4.4228e-01, PNorm = 53.7135, GNorm = 1.2842, lr_0 = 5.7595e-04
Loss = 4.0806e-01, PNorm = 53.7213, GNorm = 0.9908, lr_0 = 5.7556e-04
Loss = 4.4360e-01, PNorm = 53.7263, GNorm = 2.0714, lr_0 = 5.7516e-04
Loss = 4.1486e-01, PNorm = 53.7301, GNorm = 1.8807, lr_0 = 5.7477e-04
Loss = 4.2574e-01, PNorm = 53.7426, GNorm = 1.3704, lr_0 = 5.7438e-04
Loss = 5.4204e-01, PNorm = 53.7534, GNorm = 1.2829, lr_0 = 5.7398e-04
Loss = 4.0489e-01, PNorm = 53.7590, GNorm = 0.8874, lr_0 = 5.7359e-04
Loss = 4.3570e-01, PNorm = 53.7691, GNorm = 1.1336, lr_0 = 5.7320e-04
Loss = 4.6512e-01, PNorm = 53.7809, GNorm = 1.3160, lr_0 = 5.7280e-04
Loss = 4.4442e-01, PNorm = 53.7910, GNorm = 1.4665, lr_0 = 5.7241e-04
Loss = 4.4317e-01, PNorm = 53.7977, GNorm = 2.3673, lr_0 = 5.7202e-04
Loss = 4.2054e-01, PNorm = 53.8050, GNorm = 1.3641, lr_0 = 5.7163e-04
Loss = 4.2633e-01, PNorm = 53.8192, GNorm = 1.2419, lr_0 = 5.7124e-04
Loss = 5.0562e-01, PNorm = 53.8296, GNorm = 1.2609, lr_0 = 5.7084e-04
Loss = 4.4973e-01, PNorm = 53.8445, GNorm = 1.6849, lr_0 = 5.7045e-04
Loss = 4.2072e-01, PNorm = 53.8600, GNorm = 1.3215, lr_0 = 5.7006e-04
Loss = 4.2684e-01, PNorm = 53.8761, GNorm = 1.0099, lr_0 = 5.6967e-04
Loss = 3.8138e-01, PNorm = 53.8859, GNorm = 1.7330, lr_0 = 5.6928e-04
Loss = 3.8604e-01, PNorm = 53.8984, GNorm = 1.4180, lr_0 = 5.6889e-04
Loss = 4.2868e-01, PNorm = 53.9052, GNorm = 1.3624, lr_0 = 5.6850e-04
Loss = 5.3222e-01, PNorm = 53.9090, GNorm = 1.2509, lr_0 = 5.6811e-04
Loss = 4.1270e-01, PNorm = 53.9134, GNorm = 1.1154, lr_0 = 5.6772e-04
Loss = 4.4189e-01, PNorm = 53.9233, GNorm = 2.8662, lr_0 = 5.6733e-04
Loss = 4.5912e-01, PNorm = 53.9339, GNorm = 0.9931, lr_0 = 5.6695e-04
Loss = 4.3730e-01, PNorm = 53.9408, GNorm = 0.9695, lr_0 = 5.6656e-04
Loss = 4.0440e-01, PNorm = 53.9540, GNorm = 1.4644, lr_0 = 5.6617e-04
Loss = 4.6829e-01, PNorm = 53.9647, GNorm = 1.6171, lr_0 = 5.6578e-04
Loss = 4.2572e-01, PNorm = 53.9810, GNorm = 1.2187, lr_0 = 5.6539e-04
Loss = 3.9246e-01, PNorm = 53.9887, GNorm = 1.5761, lr_0 = 5.6501e-04
Loss = 4.1789e-01, PNorm = 53.9976, GNorm = 1.3426, lr_0 = 5.6462e-04
Loss = 4.2830e-01, PNorm = 54.0057, GNorm = 1.6264, lr_0 = 5.6423e-04
Loss = 4.4210e-01, PNorm = 54.0138, GNorm = 2.1623, lr_0 = 5.6385e-04
Loss = 3.8746e-01, PNorm = 54.0197, GNorm = 1.2048, lr_0 = 5.6346e-04
Loss = 4.5949e-01, PNorm = 54.0276, GNorm = 1.7085, lr_0 = 5.6307e-04
Loss = 4.3714e-01, PNorm = 54.0402, GNorm = 1.4717, lr_0 = 5.6269e-04
Loss = 4.3853e-01, PNorm = 54.0451, GNorm = 1.3388, lr_0 = 5.6230e-04
Validation mae = 0.115292
Epoch 9
Loss = 4.2855e-01, PNorm = 54.0586, GNorm = 1.0802, lr_0 = 5.6192e-04
Loss = 4.2369e-01, PNorm = 54.0725, GNorm = 1.3152, lr_0 = 5.6153e-04
Loss = 4.0054e-01, PNorm = 54.0904, GNorm = 1.1312, lr_0 = 5.6115e-04
Loss = 4.1861e-01, PNorm = 54.0998, GNorm = 1.1585, lr_0 = 5.6076e-04
Loss = 3.9762e-01, PNorm = 54.1091, GNorm = 1.7798, lr_0 = 5.6038e-04
Loss = 4.3446e-01, PNorm = 54.1189, GNorm = 1.4900, lr_0 = 5.6000e-04
Loss = 4.2310e-01, PNorm = 54.1280, GNorm = 1.3673, lr_0 = 5.5961e-04
Loss = 4.4012e-01, PNorm = 54.1371, GNorm = 1.1218, lr_0 = 5.5923e-04
Loss = 4.2075e-01, PNorm = 54.1483, GNorm = 1.8447, lr_0 = 5.5885e-04
Loss = 4.0149e-01, PNorm = 54.1594, GNorm = 1.1750, lr_0 = 5.5846e-04
Loss = 3.8298e-01, PNorm = 54.1740, GNorm = 1.0808, lr_0 = 5.5808e-04
Loss = 3.9536e-01, PNorm = 54.1816, GNorm = 1.8684, lr_0 = 5.5770e-04
Loss = 3.7177e-01, PNorm = 54.1929, GNorm = 1.2929, lr_0 = 5.5732e-04
Loss = 4.2622e-01, PNorm = 54.1998, GNorm = 1.8016, lr_0 = 5.5693e-04
Loss = 4.0629e-01, PNorm = 54.2144, GNorm = 2.1047, lr_0 = 5.5655e-04
Loss = 4.2639e-01, PNorm = 54.2211, GNorm = 2.1980, lr_0 = 5.5617e-04
Loss = 4.5672e-01, PNorm = 54.2301, GNorm = 2.1433, lr_0 = 5.5579e-04
Loss = 4.0336e-01, PNorm = 54.2440, GNorm = 0.9415, lr_0 = 5.5541e-04
Loss = 3.8154e-01, PNorm = 54.2553, GNorm = 1.3702, lr_0 = 5.5503e-04
Loss = 4.1651e-01, PNorm = 54.2632, GNorm = 1.0069, lr_0 = 5.5465e-04
Loss = 4.6552e-01, PNorm = 54.2667, GNorm = 1.2668, lr_0 = 5.5427e-04
Loss = 4.3168e-01, PNorm = 54.2768, GNorm = 1.1328, lr_0 = 5.5389e-04
Loss = 4.2049e-01, PNorm = 54.2897, GNorm = 1.9654, lr_0 = 5.5351e-04
Loss = 4.4521e-01, PNorm = 54.3012, GNorm = 1.8976, lr_0 = 5.5313e-04
Loss = 3.9032e-01, PNorm = 54.3143, GNorm = 1.3193, lr_0 = 5.5275e-04
Loss = 4.1677e-01, PNorm = 54.3277, GNorm = 1.5350, lr_0 = 5.5237e-04
Loss = 3.9896e-01, PNorm = 54.3389, GNorm = 1.1198, lr_0 = 5.5199e-04
Loss = 3.9815e-01, PNorm = 54.3472, GNorm = 1.8816, lr_0 = 5.5162e-04
Loss = 3.7272e-01, PNorm = 54.3601, GNorm = 1.0655, lr_0 = 5.5124e-04
Loss = 4.4076e-01, PNorm = 54.3726, GNorm = 1.5471, lr_0 = 5.5086e-04
Loss = 4.5268e-01, PNorm = 54.3785, GNorm = 1.4550, lr_0 = 5.5048e-04
Loss = 4.7294e-01, PNorm = 54.3906, GNorm = 1.2696, lr_0 = 5.5011e-04
Loss = 4.1589e-01, PNorm = 54.4005, GNorm = 1.6546, lr_0 = 5.4973e-04
Loss = 4.3605e-01, PNorm = 54.4080, GNorm = 1.1866, lr_0 = 5.4935e-04
Loss = 4.3522e-01, PNorm = 54.4145, GNorm = 1.2889, lr_0 = 5.4898e-04
Loss = 4.2741e-01, PNorm = 54.4271, GNorm = 1.0698, lr_0 = 5.4860e-04
Loss = 4.2850e-01, PNorm = 54.4377, GNorm = 1.3537, lr_0 = 5.4822e-04
Loss = 4.0655e-01, PNorm = 54.4516, GNorm = 1.2970, lr_0 = 5.4785e-04
Loss = 4.1628e-01, PNorm = 54.4614, GNorm = 1.2624, lr_0 = 5.4747e-04
Loss = 4.7240e-01, PNorm = 54.4712, GNorm = 1.9167, lr_0 = 5.4710e-04
Loss = 4.4631e-01, PNorm = 54.4824, GNorm = 1.1356, lr_0 = 5.4672e-04
Loss = 4.3429e-01, PNorm = 54.4987, GNorm = 1.5552, lr_0 = 5.4635e-04
Loss = 3.8890e-01, PNorm = 54.5118, GNorm = 1.1919, lr_0 = 5.4597e-04
Loss = 4.8849e-01, PNorm = 54.5237, GNorm = 1.4545, lr_0 = 5.4560e-04
Loss = 3.7735e-01, PNorm = 54.5319, GNorm = 1.1363, lr_0 = 5.4523e-04
Loss = 3.8808e-01, PNorm = 54.5396, GNorm = 1.4343, lr_0 = 5.4485e-04
Loss = 3.8408e-01, PNorm = 54.5405, GNorm = 1.4782, lr_0 = 5.4448e-04
Loss = 3.9919e-01, PNorm = 54.5437, GNorm = 1.4260, lr_0 = 5.4411e-04
Loss = 4.1475e-01, PNorm = 54.5579, GNorm = 1.1453, lr_0 = 5.4373e-04
Loss = 3.8499e-01, PNorm = 54.5645, GNorm = 1.9262, lr_0 = 5.4336e-04
Loss = 4.2089e-01, PNorm = 54.5786, GNorm = 1.3882, lr_0 = 5.4299e-04
Loss = 3.7398e-01, PNorm = 54.5869, GNorm = 1.5411, lr_0 = 5.4262e-04
Loss = 4.5432e-01, PNorm = 54.5949, GNorm = 0.9321, lr_0 = 5.4225e-04
Loss = 4.3356e-01, PNorm = 54.6046, GNorm = 1.2570, lr_0 = 5.4187e-04
Loss = 4.3907e-01, PNorm = 54.6139, GNorm = 1.1501, lr_0 = 5.4150e-04
Loss = 4.2700e-01, PNorm = 54.6250, GNorm = 1.2724, lr_0 = 5.4113e-04
Loss = 4.5523e-01, PNorm = 54.6394, GNorm = 1.9758, lr_0 = 5.4076e-04
Loss = 4.3303e-01, PNorm = 54.6569, GNorm = 1.7027, lr_0 = 5.4039e-04
Loss = 3.9822e-01, PNorm = 54.6710, GNorm = 1.0891, lr_0 = 5.4002e-04
Loss = 4.3303e-01, PNorm = 54.6848, GNorm = 1.1757, lr_0 = 5.3965e-04
Loss = 4.6136e-01, PNorm = 54.6973, GNorm = 1.1603, lr_0 = 5.3928e-04
Loss = 4.5440e-01, PNorm = 54.7059, GNorm = 1.4414, lr_0 = 5.3891e-04
Loss = 4.1056e-01, PNorm = 54.7119, GNorm = 1.2629, lr_0 = 5.3854e-04
Loss = 4.2819e-01, PNorm = 54.7200, GNorm = 1.5946, lr_0 = 5.3817e-04
Loss = 4.3240e-01, PNorm = 54.7293, GNorm = 1.5649, lr_0 = 5.3781e-04
Loss = 3.8946e-01, PNorm = 54.7423, GNorm = 1.7258, lr_0 = 5.3744e-04
Loss = 4.7771e-01, PNorm = 54.7522, GNorm = 1.4420, lr_0 = 5.3707e-04
Loss = 4.3171e-01, PNorm = 54.7673, GNorm = 2.0601, lr_0 = 5.3670e-04
Loss = 4.2562e-01, PNorm = 54.7769, GNorm = 1.8228, lr_0 = 5.3633e-04
Loss = 4.5754e-01, PNorm = 54.7879, GNorm = 2.1245, lr_0 = 5.3597e-04
Loss = 4.0844e-01, PNorm = 54.8013, GNorm = 1.2389, lr_0 = 5.3560e-04
Loss = 3.8653e-01, PNorm = 54.8110, GNorm = 0.9200, lr_0 = 5.3523e-04
Loss = 4.1106e-01, PNorm = 54.8205, GNorm = 1.8052, lr_0 = 5.3486e-04
Loss = 4.7340e-01, PNorm = 54.8267, GNorm = 1.3370, lr_0 = 5.3450e-04
Loss = 4.3179e-01, PNorm = 54.8378, GNorm = 1.5090, lr_0 = 5.3413e-04
Loss = 4.6973e-01, PNorm = 54.8431, GNorm = 0.8798, lr_0 = 5.3377e-04
Loss = 4.6454e-01, PNorm = 54.8539, GNorm = 1.5867, lr_0 = 5.3340e-04
Loss = 4.5581e-01, PNorm = 54.8633, GNorm = 1.0875, lr_0 = 5.3304e-04
Loss = 4.1366e-01, PNorm = 54.8700, GNorm = 1.5238, lr_0 = 5.3267e-04
Loss = 4.7305e-01, PNorm = 54.8689, GNorm = 1.2058, lr_0 = 5.3231e-04
Loss = 4.3587e-01, PNorm = 54.8825, GNorm = 1.4340, lr_0 = 5.3194e-04
Loss = 4.2536e-01, PNorm = 54.8956, GNorm = 1.2077, lr_0 = 5.3158e-04
Loss = 4.4303e-01, PNorm = 54.9045, GNorm = 1.3471, lr_0 = 5.3121e-04
Loss = 4.3529e-01, PNorm = 54.9206, GNorm = 1.4970, lr_0 = 5.3085e-04
Loss = 3.9329e-01, PNorm = 54.9296, GNorm = 1.1491, lr_0 = 5.3048e-04
Loss = 4.6170e-01, PNorm = 54.9424, GNorm = 1.0990, lr_0 = 5.3012e-04
Loss = 4.1086e-01, PNorm = 54.9574, GNorm = 1.3058, lr_0 = 5.2976e-04
Loss = 4.0927e-01, PNorm = 54.9666, GNorm = 1.0526, lr_0 = 5.2939e-04
Loss = 4.2611e-01, PNorm = 54.9747, GNorm = 1.3688, lr_0 = 5.2903e-04
Loss = 4.3362e-01, PNorm = 54.9848, GNorm = 1.8076, lr_0 = 5.2867e-04
Loss = 4.3509e-01, PNorm = 54.9933, GNorm = 1.1038, lr_0 = 5.2831e-04
Loss = 4.9191e-01, PNorm = 55.0043, GNorm = 1.5883, lr_0 = 5.2795e-04
Loss = 4.0235e-01, PNorm = 55.0142, GNorm = 5.3813, lr_0 = 5.2758e-04
Loss = 3.8288e-01, PNorm = 55.0244, GNorm = 1.4558, lr_0 = 5.2722e-04
Loss = 3.7063e-01, PNorm = 55.0319, GNorm = 1.1114, lr_0 = 5.2686e-04
Loss = 5.4155e-01, PNorm = 55.0372, GNorm = 2.4406, lr_0 = 5.2650e-04
Loss = 3.9924e-01, PNorm = 55.0514, GNorm = 2.2321, lr_0 = 5.2614e-04
Loss = 5.1894e-01, PNorm = 55.0628, GNorm = 2.2214, lr_0 = 5.2578e-04
Loss = 4.3984e-01, PNorm = 55.0767, GNorm = 1.1147, lr_0 = 5.2542e-04
Loss = 4.0117e-01, PNorm = 55.0878, GNorm = 1.4295, lr_0 = 5.2506e-04
Loss = 4.5462e-01, PNorm = 55.1020, GNorm = 1.9033, lr_0 = 5.2470e-04
Loss = 3.9771e-01, PNorm = 55.1120, GNorm = 1.6611, lr_0 = 5.2434e-04
Loss = 3.9977e-01, PNorm = 55.1191, GNorm = 1.2194, lr_0 = 5.2398e-04
Loss = 3.6283e-01, PNorm = 55.1278, GNorm = 1.1514, lr_0 = 5.2362e-04
Loss = 4.3377e-01, PNorm = 55.1378, GNorm = 1.3295, lr_0 = 5.2326e-04
Loss = 4.2267e-01, PNorm = 55.1440, GNorm = 2.5716, lr_0 = 5.2290e-04
Loss = 4.0248e-01, PNorm = 55.1515, GNorm = 1.2146, lr_0 = 5.2255e-04
Loss = 4.0932e-01, PNorm = 55.1586, GNorm = 0.9716, lr_0 = 5.2219e-04
Loss = 3.6731e-01, PNorm = 55.1725, GNorm = 0.9188, lr_0 = 5.2183e-04
Loss = 3.9022e-01, PNorm = 55.1826, GNorm = 1.2252, lr_0 = 5.2147e-04
Loss = 4.7026e-01, PNorm = 55.1923, GNorm = 1.3606, lr_0 = 5.2112e-04
Loss = 4.9848e-01, PNorm = 55.2007, GNorm = 1.5629, lr_0 = 5.2076e-04
Loss = 4.3484e-01, PNorm = 55.2145, GNorm = 1.1734, lr_0 = 5.2040e-04
Loss = 4.3880e-01, PNorm = 55.2227, GNorm = 1.3649, lr_0 = 5.2005e-04
Loss = 4.2382e-01, PNorm = 55.2426, GNorm = 1.4683, lr_0 = 5.1969e-04
Loss = 4.1812e-01, PNorm = 55.2525, GNorm = 1.4648, lr_0 = 5.1933e-04
Loss = 3.7593e-01, PNorm = 55.2610, GNorm = 1.0801, lr_0 = 5.1898e-04
Loss = 4.0634e-01, PNorm = 55.2651, GNorm = 1.6295, lr_0 = 5.1862e-04
Loss = 4.3075e-01, PNorm = 55.2703, GNorm = 1.6689, lr_0 = 5.1827e-04
Loss = 4.6138e-01, PNorm = 55.2825, GNorm = 1.9563, lr_0 = 5.1791e-04
Validation mae = 0.114636
Epoch 10
Loss = 4.2654e-01, PNorm = 55.2913, GNorm = 1.7462, lr_0 = 5.1756e-04
Loss = 4.4615e-01, PNorm = 55.2960, GNorm = 1.6828, lr_0 = 5.1720e-04
Loss = 4.0975e-01, PNorm = 55.3001, GNorm = 1.0606, lr_0 = 5.1685e-04
Loss = 4.2930e-01, PNorm = 55.3113, GNorm = 1.2634, lr_0 = 5.1649e-04
Loss = 4.3264e-01, PNorm = 55.3245, GNorm = 1.3497, lr_0 = 5.1614e-04
Loss = 4.4456e-01, PNorm = 55.3421, GNorm = 1.3981, lr_0 = 5.1579e-04
Loss = 4.7231e-01, PNorm = 55.3477, GNorm = 1.5891, lr_0 = 5.1543e-04
Loss = 4.1904e-01, PNorm = 55.3565, GNorm = 1.8456, lr_0 = 5.1508e-04
Loss = 3.7756e-01, PNorm = 55.3704, GNorm = 1.4195, lr_0 = 5.1473e-04
Loss = 4.3647e-01, PNorm = 55.3865, GNorm = 1.1651, lr_0 = 5.1437e-04
Loss = 3.7029e-01, PNorm = 55.3952, GNorm = 1.8396, lr_0 = 5.1402e-04
Loss = 4.2724e-01, PNorm = 55.4070, GNorm = 1.4620, lr_0 = 5.1367e-04
Loss = 4.1905e-01, PNorm = 55.4220, GNorm = 1.6126, lr_0 = 5.1332e-04
Loss = 4.2257e-01, PNorm = 55.4288, GNorm = 0.9848, lr_0 = 5.1297e-04
Loss = 4.1358e-01, PNorm = 55.4330, GNorm = 1.6740, lr_0 = 5.1262e-04
Loss = 4.3259e-01, PNorm = 55.4388, GNorm = 1.6536, lr_0 = 5.1226e-04
Loss = 4.0552e-01, PNorm = 55.4511, GNorm = 1.6962, lr_0 = 5.1191e-04
Loss = 3.8807e-01, PNorm = 55.4570, GNorm = 2.1851, lr_0 = 5.1156e-04
Loss = 4.6040e-01, PNorm = 55.4668, GNorm = 1.2925, lr_0 = 5.1121e-04
Loss = 4.5201e-01, PNorm = 55.4818, GNorm = 1.0006, lr_0 = 5.1086e-04
Loss = 4.2843e-01, PNorm = 55.4941, GNorm = 1.8485, lr_0 = 5.1051e-04
Loss = 3.8256e-01, PNorm = 55.4985, GNorm = 1.5098, lr_0 = 5.1016e-04
Loss = 3.7781e-01, PNorm = 55.5064, GNorm = 1.2018, lr_0 = 5.0981e-04
Loss = 4.1084e-01, PNorm = 55.5183, GNorm = 1.2794, lr_0 = 5.0946e-04
Loss = 3.8163e-01, PNorm = 55.5334, GNorm = 1.6365, lr_0 = 5.0911e-04
Loss = 4.5072e-01, PNorm = 55.5416, GNorm = 1.7572, lr_0 = 5.0877e-04
Loss = 4.2730e-01, PNorm = 55.5554, GNorm = 1.2474, lr_0 = 5.0842e-04
Loss = 3.6661e-01, PNorm = 55.5622, GNorm = 0.9501, lr_0 = 5.0807e-04
Loss = 3.7719e-01, PNorm = 55.5697, GNorm = 1.0334, lr_0 = 5.0772e-04
Loss = 4.2438e-01, PNorm = 55.5747, GNorm = 1.1301, lr_0 = 5.0737e-04
Loss = 3.4546e-01, PNorm = 55.5869, GNorm = 1.2170, lr_0 = 5.0703e-04
Loss = 4.1888e-01, PNorm = 55.5893, GNorm = 2.0616, lr_0 = 5.0668e-04
Loss = 4.2065e-01, PNorm = 55.6011, GNorm = 1.1281, lr_0 = 5.0633e-04
Loss = 4.7470e-01, PNorm = 55.6146, GNorm = 2.1645, lr_0 = 5.0598e-04
Loss = 4.0955e-01, PNorm = 55.6213, GNorm = 0.8829, lr_0 = 5.0564e-04
Loss = 4.1032e-01, PNorm = 55.6291, GNorm = 1.3362, lr_0 = 5.0529e-04
Loss = 3.9590e-01, PNorm = 55.6393, GNorm = 1.1130, lr_0 = 5.0494e-04
Loss = 4.1979e-01, PNorm = 55.6475, GNorm = 1.2217, lr_0 = 5.0460e-04
Loss = 3.9137e-01, PNorm = 55.6597, GNorm = 1.5588, lr_0 = 5.0425e-04
Loss = 5.0908e-01, PNorm = 55.6696, GNorm = 1.9574, lr_0 = 5.0391e-04
Loss = 3.8639e-01, PNorm = 55.6805, GNorm = 0.9567, lr_0 = 5.0356e-04
Loss = 4.0540e-01, PNorm = 55.6847, GNorm = 1.6152, lr_0 = 5.0322e-04
Loss = 4.1982e-01, PNorm = 55.7023, GNorm = 1.4973, lr_0 = 5.0287e-04
Loss = 4.3621e-01, PNorm = 55.7123, GNorm = 1.6903, lr_0 = 5.0253e-04
Loss = 4.0200e-01, PNorm = 55.7209, GNorm = 1.1687, lr_0 = 5.0218e-04
Loss = 3.6400e-01, PNorm = 55.7343, GNorm = 0.9033, lr_0 = 5.0184e-04
Loss = 4.6973e-01, PNorm = 55.7507, GNorm = 1.4213, lr_0 = 5.0150e-04
Loss = 4.3200e-01, PNorm = 55.7678, GNorm = 1.2852, lr_0 = 5.0115e-04
Loss = 4.5449e-01, PNorm = 55.7770, GNorm = 1.8261, lr_0 = 5.0081e-04
Loss = 3.6277e-01, PNorm = 55.7915, GNorm = 1.3180, lr_0 = 5.0047e-04
Loss = 4.1344e-01, PNorm = 55.8000, GNorm = 1.6692, lr_0 = 5.0012e-04
Loss = 3.8999e-01, PNorm = 55.8125, GNorm = 1.0267, lr_0 = 4.9978e-04
Loss = 3.7305e-01, PNorm = 55.8255, GNorm = 1.2395, lr_0 = 4.9944e-04
Loss = 4.0260e-01, PNorm = 55.8349, GNorm = 1.4706, lr_0 = 4.9910e-04
Loss = 4.2550e-01, PNorm = 55.8395, GNorm = 1.0951, lr_0 = 4.9875e-04
Loss = 3.8074e-01, PNorm = 55.8431, GNorm = 1.1240, lr_0 = 4.9841e-04
Loss = 3.9859e-01, PNorm = 55.8477, GNorm = 1.0535, lr_0 = 4.9807e-04
Loss = 4.0891e-01, PNorm = 55.8576, GNorm = 1.3587, lr_0 = 4.9773e-04
Loss = 4.6325e-01, PNorm = 55.8684, GNorm = 1.3242, lr_0 = 4.9739e-04
Loss = 3.8977e-01, PNorm = 55.8729, GNorm = 1.8207, lr_0 = 4.9705e-04
Loss = 3.9297e-01, PNorm = 55.8775, GNorm = 1.3217, lr_0 = 4.9671e-04
Loss = 4.2646e-01, PNorm = 55.8893, GNorm = 1.1233, lr_0 = 4.9637e-04
Loss = 4.0714e-01, PNorm = 55.9011, GNorm = 1.1509, lr_0 = 4.9603e-04
Loss = 4.2612e-01, PNorm = 55.9105, GNorm = 1.4607, lr_0 = 4.9569e-04
Loss = 4.0242e-01, PNorm = 55.9217, GNorm = 0.9992, lr_0 = 4.9535e-04
Loss = 4.1683e-01, PNorm = 55.9326, GNorm = 1.1545, lr_0 = 4.9501e-04
Loss = 3.9526e-01, PNorm = 55.9423, GNorm = 1.2482, lr_0 = 4.9467e-04
Loss = 3.9327e-01, PNorm = 55.9490, GNorm = 1.4646, lr_0 = 4.9433e-04
Loss = 3.6907e-01, PNorm = 55.9581, GNorm = 1.9332, lr_0 = 4.9399e-04
Loss = 5.1272e-01, PNorm = 55.9663, GNorm = 1.8608, lr_0 = 4.9365e-04
Loss = 3.8526e-01, PNorm = 55.9762, GNorm = 1.5242, lr_0 = 4.9332e-04
Loss = 3.7313e-01, PNorm = 55.9806, GNorm = 1.1789, lr_0 = 4.9298e-04
Loss = 4.0574e-01, PNorm = 55.9882, GNorm = 1.9211, lr_0 = 4.9264e-04
Loss = 4.4093e-01, PNorm = 55.9985, GNorm = 1.6490, lr_0 = 4.9230e-04
Loss = 4.1420e-01, PNorm = 56.0072, GNorm = 1.2350, lr_0 = 4.9197e-04
Loss = 3.8413e-01, PNorm = 56.0185, GNorm = 1.0194, lr_0 = 4.9163e-04
Loss = 4.3908e-01, PNorm = 56.0301, GNorm = 1.2549, lr_0 = 4.9129e-04
Loss = 4.0516e-01, PNorm = 56.0381, GNorm = 2.3281, lr_0 = 4.9095e-04
Loss = 3.9968e-01, PNorm = 56.0404, GNorm = 1.2254, lr_0 = 4.9062e-04
Loss = 4.2598e-01, PNorm = 56.0473, GNorm = 1.5141, lr_0 = 4.9028e-04
Loss = 3.7522e-01, PNorm = 56.0546, GNorm = 1.1135, lr_0 = 4.8995e-04
Loss = 5.2987e-01, PNorm = 56.0654, GNorm = 1.6341, lr_0 = 4.8961e-04
Loss = 4.7383e-01, PNorm = 56.0718, GNorm = 1.0938, lr_0 = 4.8928e-04
Loss = 4.3495e-01, PNorm = 56.0830, GNorm = 1.4674, lr_0 = 4.8894e-04
Loss = 3.8204e-01, PNorm = 56.0983, GNorm = 1.2597, lr_0 = 4.8861e-04
Loss = 4.2705e-01, PNorm = 56.1087, GNorm = 1.1926, lr_0 = 4.8827e-04
Loss = 4.1095e-01, PNorm = 56.1149, GNorm = 1.2610, lr_0 = 4.8794e-04
Loss = 3.7044e-01, PNorm = 56.1220, GNorm = 1.3878, lr_0 = 4.8760e-04
Loss = 4.6934e-01, PNorm = 56.1189, GNorm = 1.5311, lr_0 = 4.8727e-04
Loss = 4.0535e-01, PNorm = 56.1299, GNorm = 1.1205, lr_0 = 4.8693e-04
Loss = 4.2253e-01, PNorm = 56.1387, GNorm = 0.9952, lr_0 = 4.8660e-04
Loss = 4.2274e-01, PNorm = 56.1504, GNorm = 1.5270, lr_0 = 4.8627e-04
Loss = 3.7874e-01, PNorm = 56.1555, GNorm = 1.4949, lr_0 = 4.8593e-04
Loss = 4.6638e-01, PNorm = 56.1658, GNorm = 1.6416, lr_0 = 4.8560e-04
Loss = 4.1648e-01, PNorm = 56.1760, GNorm = 1.1657, lr_0 = 4.8527e-04
Loss = 4.4396e-01, PNorm = 56.1808, GNorm = 1.3198, lr_0 = 4.8494e-04
Loss = 4.4394e-01, PNorm = 56.1901, GNorm = 1.4835, lr_0 = 4.8460e-04
Loss = 4.0565e-01, PNorm = 56.2019, GNorm = 1.8318, lr_0 = 4.8427e-04
Loss = 4.4228e-01, PNorm = 56.2100, GNorm = 1.1110, lr_0 = 4.8394e-04
Loss = 4.5939e-01, PNorm = 56.2140, GNorm = 1.4307, lr_0 = 4.8361e-04
Loss = 4.0812e-01, PNorm = 56.2212, GNorm = 1.1141, lr_0 = 4.8328e-04
Loss = 4.1504e-01, PNorm = 56.2298, GNorm = 1.3879, lr_0 = 4.8295e-04
Loss = 4.5427e-01, PNorm = 56.2386, GNorm = 2.3297, lr_0 = 4.8262e-04
Loss = 4.2182e-01, PNorm = 56.2457, GNorm = 1.3605, lr_0 = 4.8228e-04
Loss = 4.0124e-01, PNorm = 56.2533, GNorm = 1.0414, lr_0 = 4.8195e-04
Loss = 4.2357e-01, PNorm = 56.2605, GNorm = 1.9901, lr_0 = 4.8162e-04
Loss = 4.4249e-01, PNorm = 56.2687, GNorm = 2.0365, lr_0 = 4.8129e-04
Loss = 3.8280e-01, PNorm = 56.2829, GNorm = 1.2232, lr_0 = 4.8096e-04
Loss = 3.8038e-01, PNorm = 56.2856, GNorm = 1.4199, lr_0 = 4.8064e-04
Loss = 4.0568e-01, PNorm = 56.2859, GNorm = 1.5883, lr_0 = 4.8031e-04
Loss = 4.0665e-01, PNorm = 56.2953, GNorm = 1.8432, lr_0 = 4.7998e-04
Loss = 4.3749e-01, PNorm = 56.3059, GNorm = 1.6115, lr_0 = 4.7965e-04
Loss = 4.2493e-01, PNorm = 56.3200, GNorm = 1.3559, lr_0 = 4.7932e-04
Loss = 4.3360e-01, PNorm = 56.3297, GNorm = 2.4282, lr_0 = 4.7899e-04
Loss = 4.7396e-01, PNorm = 56.3456, GNorm = 1.4760, lr_0 = 4.7866e-04
Loss = 4.3936e-01, PNorm = 56.3556, GNorm = 1.4111, lr_0 = 4.7833e-04
Loss = 4.0177e-01, PNorm = 56.3640, GNorm = 1.0650, lr_0 = 4.7801e-04
Loss = 4.4019e-01, PNorm = 56.3688, GNorm = 1.1915, lr_0 = 4.7768e-04
Loss = 4.1782e-01, PNorm = 56.3802, GNorm = 1.4661, lr_0 = 4.7735e-04
Loss = 4.5843e-01, PNorm = 56.3917, GNorm = 1.3787, lr_0 = 4.7703e-04
Validation mae = 0.114459
Epoch 11
Loss = 3.6137e-01, PNorm = 56.3988, GNorm = 1.1262, lr_0 = 4.7670e-04
Loss = 4.3506e-01, PNorm = 56.4046, GNorm = 1.1778, lr_0 = 4.7637e-04
Loss = 3.3843e-01, PNorm = 56.4145, GNorm = 0.9801, lr_0 = 4.7605e-04
Loss = 4.2624e-01, PNorm = 56.4233, GNorm = 1.4288, lr_0 = 4.7572e-04
Loss = 3.7140e-01, PNorm = 56.4290, GNorm = 1.0123, lr_0 = 4.7539e-04
Loss = 4.3840e-01, PNorm = 56.4321, GNorm = 1.5387, lr_0 = 4.7507e-04
Loss = 3.8190e-01, PNorm = 56.4394, GNorm = 1.5076, lr_0 = 4.7474e-04
Loss = 4.0536e-01, PNorm = 56.4438, GNorm = 1.0243, lr_0 = 4.7442e-04
Loss = 3.4016e-01, PNorm = 56.4490, GNorm = 1.0608, lr_0 = 4.7409e-04
Loss = 3.6996e-01, PNorm = 56.4590, GNorm = 1.7664, lr_0 = 4.7377e-04
Loss = 4.0249e-01, PNorm = 56.4682, GNorm = 2.0402, lr_0 = 4.7344e-04
Loss = 3.9491e-01, PNorm = 56.4739, GNorm = 1.7231, lr_0 = 4.7312e-04
Loss = 4.3231e-01, PNorm = 56.4814, GNorm = 1.5394, lr_0 = 4.7279e-04
Loss = 3.9033e-01, PNorm = 56.4916, GNorm = 1.7578, lr_0 = 4.7247e-04
Loss = 3.7296e-01, PNorm = 56.4967, GNorm = 1.3299, lr_0 = 4.7215e-04
Loss = 3.8003e-01, PNorm = 56.5070, GNorm = 1.3782, lr_0 = 4.7182e-04
Loss = 3.6494e-01, PNorm = 56.5158, GNorm = 1.1452, lr_0 = 4.7150e-04
Loss = 3.8711e-01, PNorm = 56.5278, GNorm = 1.0581, lr_0 = 4.7118e-04
Loss = 3.7249e-01, PNorm = 56.5329, GNorm = 2.1576, lr_0 = 4.7085e-04
Loss = 3.8016e-01, PNorm = 56.5404, GNorm = 1.5093, lr_0 = 4.7053e-04
Loss = 4.0026e-01, PNorm = 56.5467, GNorm = 2.1036, lr_0 = 4.7021e-04
Loss = 3.7346e-01, PNorm = 56.5577, GNorm = 1.4685, lr_0 = 4.6989e-04
Loss = 4.4459e-01, PNorm = 56.5613, GNorm = 1.3000, lr_0 = 4.6957e-04
Loss = 4.2261e-01, PNorm = 56.5668, GNorm = 1.3848, lr_0 = 4.6924e-04
Loss = 4.2778e-01, PNorm = 56.5722, GNorm = 1.0879, lr_0 = 4.6892e-04
Loss = 3.6160e-01, PNorm = 56.5835, GNorm = 1.1475, lr_0 = 4.6860e-04
Loss = 3.8147e-01, PNorm = 56.5897, GNorm = 0.9951, lr_0 = 4.6828e-04
Loss = 4.0210e-01, PNorm = 56.5949, GNorm = 2.3516, lr_0 = 4.6796e-04
Loss = 3.6814e-01, PNorm = 56.6003, GNorm = 1.2219, lr_0 = 4.6764e-04
Loss = 4.5067e-01, PNorm = 56.6060, GNorm = 1.7251, lr_0 = 4.6732e-04
Loss = 4.1134e-01, PNorm = 56.6115, GNorm = 1.5490, lr_0 = 4.6700e-04
Loss = 4.5546e-01, PNorm = 56.6179, GNorm = 1.4125, lr_0 = 4.6668e-04
Loss = 4.4699e-01, PNorm = 56.6308, GNorm = 1.4675, lr_0 = 4.6636e-04
Loss = 3.8292e-01, PNorm = 56.6398, GNorm = 1.2509, lr_0 = 4.6604e-04
Loss = 3.9589e-01, PNorm = 56.6501, GNorm = 1.3925, lr_0 = 4.6572e-04
Loss = 3.6379e-01, PNorm = 56.6528, GNorm = 1.5662, lr_0 = 4.6540e-04
Loss = 4.3305e-01, PNorm = 56.6641, GNorm = 1.1184, lr_0 = 4.6508e-04
Loss = 4.0750e-01, PNorm = 56.6707, GNorm = 1.9634, lr_0 = 4.6476e-04
Loss = 4.2180e-01, PNorm = 56.6734, GNorm = 1.6197, lr_0 = 4.6445e-04
Loss = 4.0908e-01, PNorm = 56.6867, GNorm = 1.0824, lr_0 = 4.6413e-04
Loss = 4.0089e-01, PNorm = 56.6975, GNorm = 1.0668, lr_0 = 4.6381e-04
Loss = 3.8644e-01, PNorm = 56.7024, GNorm = 1.1473, lr_0 = 4.6349e-04
Loss = 4.1817e-01, PNorm = 56.7130, GNorm = 1.0924, lr_0 = 4.6317e-04
Loss = 3.3635e-01, PNorm = 56.7238, GNorm = 1.6770, lr_0 = 4.6286e-04
Loss = 4.3024e-01, PNorm = 56.7292, GNorm = 1.3223, lr_0 = 4.6254e-04
Loss = 3.8438e-01, PNorm = 56.7371, GNorm = 1.1141, lr_0 = 4.6222e-04
Loss = 4.2782e-01, PNorm = 56.7483, GNorm = 1.7155, lr_0 = 4.6191e-04
Loss = 3.6989e-01, PNorm = 56.7625, GNorm = 1.0712, lr_0 = 4.6159e-04
Loss = 3.9535e-01, PNorm = 56.7669, GNorm = 1.3412, lr_0 = 4.6127e-04
Loss = 3.8819e-01, PNorm = 56.7734, GNorm = 1.8033, lr_0 = 4.6096e-04
Loss = 3.9469e-01, PNorm = 56.7795, GNorm = 1.6864, lr_0 = 4.6064e-04
Loss = 4.3575e-01, PNorm = 56.7859, GNorm = 1.9113, lr_0 = 4.6033e-04
Loss = 4.0556e-01, PNorm = 56.7945, GNorm = 1.3520, lr_0 = 4.6001e-04
Loss = 4.3097e-01, PNorm = 56.7986, GNorm = 1.5450, lr_0 = 4.5970e-04
Loss = 3.7319e-01, PNorm = 56.8091, GNorm = 1.0134, lr_0 = 4.5938e-04
Loss = 4.1134e-01, PNorm = 56.8160, GNorm = 1.4973, lr_0 = 4.5907e-04
Loss = 4.2178e-01, PNorm = 56.8246, GNorm = 1.4553, lr_0 = 4.5875e-04
Loss = 3.9376e-01, PNorm = 56.8300, GNorm = 0.8116, lr_0 = 4.5844e-04
Loss = 4.1522e-01, PNorm = 56.8343, GNorm = 1.2312, lr_0 = 4.5812e-04
Loss = 3.9163e-01, PNorm = 56.8456, GNorm = 1.4179, lr_0 = 4.5781e-04
Loss = 3.9514e-01, PNorm = 56.8535, GNorm = 1.8994, lr_0 = 4.5750e-04
Loss = 3.8714e-01, PNorm = 56.8569, GNorm = 1.3195, lr_0 = 4.5718e-04
Loss = 4.1187e-01, PNorm = 56.8638, GNorm = 1.3722, lr_0 = 4.5687e-04
Loss = 3.8735e-01, PNorm = 56.8726, GNorm = 1.1090, lr_0 = 4.5656e-04
Loss = 4.3474e-01, PNorm = 56.8797, GNorm = 1.2851, lr_0 = 4.5624e-04
Loss = 4.6224e-01, PNorm = 56.8960, GNorm = 1.6071, lr_0 = 4.5593e-04
Loss = 4.3245e-01, PNorm = 56.8988, GNorm = 1.6408, lr_0 = 4.5562e-04
Loss = 4.3184e-01, PNorm = 56.9087, GNorm = 1.3317, lr_0 = 4.5531e-04
Loss = 4.1580e-01, PNorm = 56.9136, GNorm = 1.4277, lr_0 = 4.5499e-04
Loss = 4.2057e-01, PNorm = 56.9255, GNorm = 1.1871, lr_0 = 4.5468e-04
Loss = 3.8260e-01, PNorm = 56.9329, GNorm = 1.4398, lr_0 = 4.5437e-04
Loss = 4.0725e-01, PNorm = 56.9405, GNorm = 1.5004, lr_0 = 4.5406e-04
Loss = 4.2664e-01, PNorm = 56.9453, GNorm = 1.4583, lr_0 = 4.5375e-04
Loss = 3.9528e-01, PNorm = 56.9544, GNorm = 0.9229, lr_0 = 4.5344e-04
Loss = 4.0089e-01, PNorm = 56.9604, GNorm = 1.7070, lr_0 = 4.5313e-04
Loss = 4.5118e-01, PNorm = 56.9671, GNorm = 2.4370, lr_0 = 4.5282e-04
Loss = 3.8468e-01, PNorm = 56.9752, GNorm = 1.3803, lr_0 = 4.5251e-04
Loss = 3.9947e-01, PNorm = 56.9805, GNorm = 1.4903, lr_0 = 4.5220e-04
Loss = 3.9463e-01, PNorm = 56.9802, GNorm = 1.9957, lr_0 = 4.5189e-04
Loss = 4.5302e-01, PNorm = 56.9856, GNorm = 1.5854, lr_0 = 4.5158e-04
Loss = 3.8074e-01, PNorm = 56.9955, GNorm = 1.3182, lr_0 = 4.5127e-04
Loss = 5.1391e-01, PNorm = 57.0050, GNorm = 1.5131, lr_0 = 4.5096e-04
Loss = 3.7670e-01, PNorm = 57.0115, GNorm = 1.2434, lr_0 = 4.5065e-04
Loss = 3.9645e-01, PNorm = 57.0205, GNorm = 1.5073, lr_0 = 4.5034e-04
Loss = 4.1312e-01, PNorm = 57.0244, GNorm = 1.5645, lr_0 = 4.5003e-04
Loss = 4.0469e-01, PNorm = 57.0321, GNorm = 1.1664, lr_0 = 4.4972e-04
Loss = 4.0642e-01, PNorm = 57.0416, GNorm = 1.7955, lr_0 = 4.4942e-04
Loss = 4.4642e-01, PNorm = 57.0494, GNorm = 1.3251, lr_0 = 4.4911e-04
Loss = 4.8468e-01, PNorm = 57.0564, GNorm = 1.2543, lr_0 = 4.4880e-04
Loss = 3.9632e-01, PNorm = 57.0652, GNorm = 1.6236, lr_0 = 4.4849e-04
Loss = 3.9721e-01, PNorm = 57.0732, GNorm = 1.2010, lr_0 = 4.4819e-04
Loss = 4.0145e-01, PNorm = 57.0814, GNorm = 1.0859, lr_0 = 4.4788e-04
Loss = 4.0847e-01, PNorm = 57.0880, GNorm = 1.5023, lr_0 = 4.4757e-04
Loss = 4.0017e-01, PNorm = 57.0944, GNorm = 1.1895, lr_0 = 4.4727e-04
Loss = 4.3766e-01, PNorm = 57.0993, GNorm = 2.0579, lr_0 = 4.4696e-04
Loss = 4.0666e-01, PNorm = 57.1112, GNorm = 1.7129, lr_0 = 4.4665e-04
Loss = 4.0569e-01, PNorm = 57.1193, GNorm = 1.4030, lr_0 = 4.4635e-04
Loss = 3.8820e-01, PNorm = 57.1295, GNorm = 1.5619, lr_0 = 4.4604e-04
Loss = 4.2528e-01, PNorm = 57.1423, GNorm = 2.2771, lr_0 = 4.4574e-04
Loss = 3.5546e-01, PNorm = 57.1453, GNorm = 1.5952, lr_0 = 4.4543e-04
Loss = 4.1629e-01, PNorm = 57.1503, GNorm = 1.9341, lr_0 = 4.4513e-04
Loss = 4.4077e-01, PNorm = 57.1537, GNorm = 1.3789, lr_0 = 4.4482e-04
Loss = 3.7645e-01, PNorm = 57.1602, GNorm = 1.0716, lr_0 = 4.4452e-04
Loss = 4.0638e-01, PNorm = 57.1665, GNorm = 1.0222, lr_0 = 4.4421e-04
Loss = 4.4786e-01, PNorm = 57.1726, GNorm = 1.4218, lr_0 = 4.4391e-04
Loss = 4.2620e-01, PNorm = 57.1839, GNorm = 1.6586, lr_0 = 4.4360e-04
Loss = 4.1297e-01, PNorm = 57.1920, GNorm = 1.5735, lr_0 = 4.4330e-04
Loss = 4.5225e-01, PNorm = 57.1944, GNorm = 1.9664, lr_0 = 4.4299e-04
Loss = 4.1702e-01, PNorm = 57.2012, GNorm = 1.2003, lr_0 = 4.4269e-04
Loss = 4.0332e-01, PNorm = 57.2093, GNorm = 1.0094, lr_0 = 4.4239e-04
Loss = 4.0943e-01, PNorm = 57.2153, GNorm = 1.4810, lr_0 = 4.4209e-04
Loss = 4.4302e-01, PNorm = 57.2273, GNorm = 1.4216, lr_0 = 4.4178e-04
Loss = 4.3094e-01, PNorm = 57.2305, GNorm = 2.0689, lr_0 = 4.4148e-04
Loss = 4.3903e-01, PNorm = 57.2348, GNorm = 1.2409, lr_0 = 4.4118e-04
Loss = 5.0622e-01, PNorm = 57.2415, GNorm = 1.2164, lr_0 = 4.4088e-04
Loss = 5.0211e-01, PNorm = 57.2486, GNorm = 1.5204, lr_0 = 4.4057e-04
Loss = 4.1127e-01, PNorm = 57.2556, GNorm = 1.2816, lr_0 = 4.4027e-04
Loss = 3.8568e-01, PNorm = 57.2601, GNorm = 1.3830, lr_0 = 4.3997e-04
Loss = 3.9320e-01, PNorm = 57.2651, GNorm = 1.2250, lr_0 = 4.3967e-04
Loss = 4.0811e-01, PNorm = 57.2689, GNorm = 1.2562, lr_0 = 4.3937e-04
Validation mae = 0.114679
Epoch 12
Loss = 4.4003e-01, PNorm = 57.2743, GNorm = 1.7516, lr_0 = 4.3907e-04
Loss = 4.1422e-01, PNorm = 57.2848, GNorm = 1.2198, lr_0 = 4.3877e-04
Loss = 3.7378e-01, PNorm = 57.2903, GNorm = 1.0341, lr_0 = 4.3846e-04
Loss = 4.1876e-01, PNorm = 57.2983, GNorm = 1.3686, lr_0 = 4.3816e-04
Loss = 4.1165e-01, PNorm = 57.3102, GNorm = 1.5290, lr_0 = 4.3786e-04
Loss = 3.5457e-01, PNorm = 57.3129, GNorm = 0.9845, lr_0 = 4.3756e-04
Loss = 3.9997e-01, PNorm = 57.3204, GNorm = 1.7514, lr_0 = 4.3726e-04
Loss = 3.9854e-01, PNorm = 57.3273, GNorm = 1.2041, lr_0 = 4.3696e-04
Loss = 3.9806e-01, PNorm = 57.3323, GNorm = 1.0741, lr_0 = 4.3667e-04
Loss = 4.0412e-01, PNorm = 57.3402, GNorm = 1.1079, lr_0 = 4.3637e-04
Loss = 3.8754e-01, PNorm = 57.3503, GNorm = 1.5210, lr_0 = 4.3607e-04
Loss = 3.9946e-01, PNorm = 57.3574, GNorm = 1.1886, lr_0 = 4.3577e-04
Loss = 3.9818e-01, PNorm = 57.3620, GNorm = 1.1564, lr_0 = 4.3547e-04
Loss = 4.2808e-01, PNorm = 57.3656, GNorm = 1.3352, lr_0 = 4.3517e-04
Loss = 3.8195e-01, PNorm = 57.3705, GNorm = 1.3031, lr_0 = 4.3487e-04
Loss = 4.6574e-01, PNorm = 57.3788, GNorm = 1.1816, lr_0 = 4.3458e-04
Loss = 4.1466e-01, PNorm = 57.3907, GNorm = 0.9545, lr_0 = 4.3428e-04
Loss = 3.8741e-01, PNorm = 57.3979, GNorm = 1.0991, lr_0 = 4.3398e-04
Loss = 4.1610e-01, PNorm = 57.4047, GNorm = 1.0662, lr_0 = 4.3368e-04
Loss = 3.6587e-01, PNorm = 57.4143, GNorm = 1.1005, lr_0 = 4.3339e-04
Loss = 4.0681e-01, PNorm = 57.4225, GNorm = 1.3461, lr_0 = 4.3309e-04
Loss = 3.8572e-01, PNorm = 57.4253, GNorm = 1.2033, lr_0 = 4.3279e-04
Loss = 4.0344e-01, PNorm = 57.4313, GNorm = 1.9427, lr_0 = 4.3250e-04
Loss = 3.8745e-01, PNorm = 57.4391, GNorm = 1.2405, lr_0 = 4.3220e-04
Loss = 3.6889e-01, PNorm = 57.4491, GNorm = 1.1233, lr_0 = 4.3190e-04
Loss = 4.0889e-01, PNorm = 57.4539, GNorm = 1.5018, lr_0 = 4.3161e-04
Loss = 4.5781e-01, PNorm = 57.4551, GNorm = 0.9061, lr_0 = 4.3131e-04
Loss = 3.5807e-01, PNorm = 57.4616, GNorm = 1.1244, lr_0 = 4.3102e-04
Loss = 4.0895e-01, PNorm = 57.4728, GNorm = 1.4814, lr_0 = 4.3072e-04
Loss = 3.7409e-01, PNorm = 57.4762, GNorm = 1.2664, lr_0 = 4.3043e-04
Loss = 4.3533e-01, PNorm = 57.4893, GNorm = 2.4414, lr_0 = 4.3013e-04
Loss = 3.7016e-01, PNorm = 57.5020, GNorm = 1.1194, lr_0 = 4.2984e-04
Loss = 4.1330e-01, PNorm = 57.5078, GNorm = 1.2850, lr_0 = 4.2954e-04
Loss = 3.9569e-01, PNorm = 57.5148, GNorm = 1.2832, lr_0 = 4.2925e-04
Loss = 3.5666e-01, PNorm = 57.5194, GNorm = 1.3208, lr_0 = 4.2895e-04
Loss = 4.4099e-01, PNorm = 57.5260, GNorm = 1.0118, lr_0 = 4.2866e-04
Loss = 4.0920e-01, PNorm = 57.5355, GNorm = 1.2755, lr_0 = 4.2837e-04
Loss = 4.1146e-01, PNorm = 57.5433, GNorm = 1.7009, lr_0 = 4.2807e-04
Loss = 3.6837e-01, PNorm = 57.5515, GNorm = 1.2343, lr_0 = 4.2778e-04
Loss = 3.3012e-01, PNorm = 57.5547, GNorm = 1.3243, lr_0 = 4.2749e-04
Loss = 3.6255e-01, PNorm = 57.5618, GNorm = 1.0392, lr_0 = 4.2719e-04
Loss = 3.6537e-01, PNorm = 57.5738, GNorm = 1.3851, lr_0 = 4.2690e-04
Loss = 3.8751e-01, PNorm = 57.5828, GNorm = 1.2791, lr_0 = 4.2661e-04
Loss = 3.6460e-01, PNorm = 57.5836, GNorm = 1.3692, lr_0 = 4.2632e-04
Loss = 3.9924e-01, PNorm = 57.5939, GNorm = 1.2872, lr_0 = 4.2602e-04
Loss = 3.9862e-01, PNorm = 57.6032, GNorm = 1.0703, lr_0 = 4.2573e-04
Loss = 4.5264e-01, PNorm = 57.6160, GNorm = 1.1742, lr_0 = 4.2544e-04
Loss = 4.5578e-01, PNorm = 57.6179, GNorm = 1.1992, lr_0 = 4.2515e-04
Loss = 4.1507e-01, PNorm = 57.6258, GNorm = 1.6116, lr_0 = 4.2486e-04
Loss = 4.5302e-01, PNorm = 57.6321, GNorm = 1.2173, lr_0 = 4.2457e-04
Loss = 4.0710e-01, PNorm = 57.6389, GNorm = 1.2150, lr_0 = 4.2428e-04
Loss = 3.5533e-01, PNorm = 57.6456, GNorm = 1.7817, lr_0 = 4.2399e-04
Loss = 3.7647e-01, PNorm = 57.6499, GNorm = 1.2466, lr_0 = 4.2370e-04
Loss = 3.6812e-01, PNorm = 57.6565, GNorm = 0.9353, lr_0 = 4.2340e-04
Loss = 3.6539e-01, PNorm = 57.6670, GNorm = 1.2176, lr_0 = 4.2311e-04
Loss = 4.0874e-01, PNorm = 57.6770, GNorm = 1.7889, lr_0 = 4.2283e-04
Loss = 3.7757e-01, PNorm = 57.6871, GNorm = 1.1599, lr_0 = 4.2254e-04
Loss = 4.1469e-01, PNorm = 57.6908, GNorm = 1.5188, lr_0 = 4.2225e-04
Loss = 4.5396e-01, PNorm = 57.6941, GNorm = 1.4951, lr_0 = 4.2196e-04
Loss = 3.9619e-01, PNorm = 57.6996, GNorm = 1.3686, lr_0 = 4.2167e-04
Loss = 3.9499e-01, PNorm = 57.7087, GNorm = 1.8294, lr_0 = 4.2138e-04
Loss = 3.6055e-01, PNorm = 57.7165, GNorm = 1.5662, lr_0 = 4.2109e-04
Loss = 4.2399e-01, PNorm = 57.7236, GNorm = 1.5264, lr_0 = 4.2080e-04
Loss = 4.5619e-01, PNorm = 57.7260, GNorm = 1.4978, lr_0 = 4.2051e-04
Loss = 4.2204e-01, PNorm = 57.7335, GNorm = 1.0291, lr_0 = 4.2023e-04
Loss = 3.7307e-01, PNorm = 57.7327, GNorm = 1.1678, lr_0 = 4.1994e-04
Loss = 4.0084e-01, PNorm = 57.7342, GNorm = 1.2691, lr_0 = 4.1965e-04
Loss = 3.7557e-01, PNorm = 57.7418, GNorm = 1.0287, lr_0 = 4.1936e-04
Loss = 3.7269e-01, PNorm = 57.7435, GNorm = 1.6890, lr_0 = 4.1907e-04
Loss = 4.6446e-01, PNorm = 57.7511, GNorm = 1.4382, lr_0 = 4.1879e-04
Loss = 3.9133e-01, PNorm = 57.7604, GNorm = 1.1995, lr_0 = 4.1850e-04
Loss = 4.1536e-01, PNorm = 57.7652, GNorm = 1.0681, lr_0 = 4.1821e-04
Loss = 3.5130e-01, PNorm = 57.7711, GNorm = 1.0572, lr_0 = 4.1793e-04
Loss = 3.3801e-01, PNorm = 57.7770, GNorm = 1.4973, lr_0 = 4.1764e-04
Loss = 3.7354e-01, PNorm = 57.7814, GNorm = 1.4892, lr_0 = 4.1736e-04
Loss = 3.9215e-01, PNorm = 57.7865, GNorm = 1.6909, lr_0 = 4.1707e-04
Loss = 4.0415e-01, PNorm = 57.7921, GNorm = 1.1534, lr_0 = 4.1678e-04
Loss = 3.4164e-01, PNorm = 57.7979, GNorm = 1.7882, lr_0 = 4.1650e-04
Loss = 4.4111e-01, PNorm = 57.7989, GNorm = 1.3957, lr_0 = 4.1621e-04
Loss = 4.5093e-01, PNorm = 57.8082, GNorm = 2.4712, lr_0 = 4.1593e-04
Loss = 4.3454e-01, PNorm = 57.8113, GNorm = 1.4691, lr_0 = 4.1564e-04
Loss = 4.1833e-01, PNorm = 57.8218, GNorm = 1.5365, lr_0 = 4.1536e-04
Loss = 3.4294e-01, PNorm = 57.8303, GNorm = 1.3428, lr_0 = 4.1507e-04
Loss = 4.0222e-01, PNorm = 57.8354, GNorm = 1.7538, lr_0 = 4.1479e-04
Loss = 3.9226e-01, PNorm = 57.8412, GNorm = 1.6070, lr_0 = 4.1450e-04
Loss = 4.4026e-01, PNorm = 57.8478, GNorm = 1.1668, lr_0 = 4.1422e-04
Loss = 4.3213e-01, PNorm = 57.8504, GNorm = 2.0638, lr_0 = 4.1394e-04
Loss = 3.9620e-01, PNorm = 57.8552, GNorm = 2.8620, lr_0 = 4.1365e-04
Loss = 4.5108e-01, PNorm = 57.8671, GNorm = 1.9743, lr_0 = 4.1337e-04
Loss = 4.3053e-01, PNorm = 57.8755, GNorm = 1.5459, lr_0 = 4.1309e-04
Loss = 4.1494e-01, PNorm = 57.8839, GNorm = 1.9108, lr_0 = 4.1280e-04
Loss = 4.7251e-01, PNorm = 57.8940, GNorm = 1.5876, lr_0 = 4.1252e-04
Loss = 4.1214e-01, PNorm = 57.9075, GNorm = 1.0355, lr_0 = 4.1224e-04
Loss = 4.1676e-01, PNorm = 57.9130, GNorm = 1.4891, lr_0 = 4.1196e-04
Loss = 4.1756e-01, PNorm = 57.9163, GNorm = 1.2460, lr_0 = 4.1167e-04
Loss = 4.4913e-01, PNorm = 57.9191, GNorm = 1.5524, lr_0 = 4.1139e-04
Loss = 4.2622e-01, PNorm = 57.9273, GNorm = 1.6109, lr_0 = 4.1111e-04
Loss = 3.8387e-01, PNorm = 57.9342, GNorm = 1.0160, lr_0 = 4.1083e-04
Loss = 3.6956e-01, PNorm = 57.9397, GNorm = 1.2111, lr_0 = 4.1055e-04
Loss = 4.3703e-01, PNorm = 57.9479, GNorm = 1.2428, lr_0 = 4.1027e-04
Loss = 4.1828e-01, PNorm = 57.9528, GNorm = 0.9859, lr_0 = 4.0998e-04
Loss = 4.4233e-01, PNorm = 57.9590, GNorm = 1.4339, lr_0 = 4.0970e-04
Loss = 4.2788e-01, PNorm = 57.9623, GNorm = 1.1690, lr_0 = 4.0942e-04
Loss = 3.5299e-01, PNorm = 57.9688, GNorm = 1.4350, lr_0 = 4.0914e-04
Loss = 4.3756e-01, PNorm = 57.9749, GNorm = 0.9104, lr_0 = 4.0886e-04
Loss = 4.0585e-01, PNorm = 57.9828, GNorm = 1.0301, lr_0 = 4.0858e-04
Loss = 4.0736e-01, PNorm = 57.9897, GNorm = 1.1715, lr_0 = 4.0830e-04
Loss = 3.8250e-01, PNorm = 57.9936, GNorm = 1.2550, lr_0 = 4.0802e-04
Loss = 3.8704e-01, PNorm = 57.9980, GNorm = 1.1691, lr_0 = 4.0774e-04
Loss = 3.7945e-01, PNorm = 58.0028, GNorm = 2.0384, lr_0 = 4.0746e-04
Loss = 3.7595e-01, PNorm = 58.0069, GNorm = 1.3688, lr_0 = 4.0718e-04
Loss = 3.8448e-01, PNorm = 58.0174, GNorm = 1.5268, lr_0 = 4.0691e-04
Loss = 4.1160e-01, PNorm = 58.0244, GNorm = 1.4097, lr_0 = 4.0663e-04
Loss = 4.6292e-01, PNorm = 58.0340, GNorm = 1.3573, lr_0 = 4.0635e-04
Loss = 4.2626e-01, PNorm = 58.0375, GNorm = 1.4573, lr_0 = 4.0607e-04
Loss = 4.0067e-01, PNorm = 58.0382, GNorm = 1.6863, lr_0 = 4.0579e-04
Loss = 3.9279e-01, PNorm = 58.0482, GNorm = 1.5032, lr_0 = 4.0551e-04
Loss = 4.0792e-01, PNorm = 58.0536, GNorm = 1.1661, lr_0 = 4.0524e-04
Loss = 3.6345e-01, PNorm = 58.0582, GNorm = 1.1067, lr_0 = 4.0496e-04
Loss = 3.9524e-01, PNorm = 58.0670, GNorm = 1.3869, lr_0 = 4.0468e-04
Validation mae = 0.112811
Epoch 13
Loss = 3.8048e-01, PNorm = 58.0717, GNorm = 0.8302, lr_0 = 4.0440e-04
Loss = 3.0243e-01, PNorm = 58.0770, GNorm = 1.3579, lr_0 = 4.0413e-04
Loss = 3.7706e-01, PNorm = 58.0811, GNorm = 1.5168, lr_0 = 4.0385e-04
Loss = 3.9645e-01, PNorm = 58.0834, GNorm = 1.1912, lr_0 = 4.0357e-04
Loss = 3.9295e-01, PNorm = 58.0924, GNorm = 1.7704, lr_0 = 4.0330e-04
Loss = 3.9792e-01, PNorm = 58.1022, GNorm = 1.0920, lr_0 = 4.0302e-04
Loss = 4.2868e-01, PNorm = 58.1052, GNorm = 1.8289, lr_0 = 4.0274e-04
Loss = 3.6446e-01, PNorm = 58.1109, GNorm = 0.9781, lr_0 = 4.0247e-04
Loss = 3.8781e-01, PNorm = 58.1123, GNorm = 1.2526, lr_0 = 4.0219e-04
Loss = 3.6463e-01, PNorm = 58.1182, GNorm = 1.1901, lr_0 = 4.0192e-04
Loss = 3.7680e-01, PNorm = 58.1213, GNorm = 1.0969, lr_0 = 4.0164e-04
Loss = 3.9866e-01, PNorm = 58.1290, GNorm = 1.4342, lr_0 = 4.0137e-04
Loss = 4.1562e-01, PNorm = 58.1326, GNorm = 1.5250, lr_0 = 4.0109e-04
Loss = 3.9950e-01, PNorm = 58.1396, GNorm = 1.3605, lr_0 = 4.0082e-04
Loss = 3.4829e-01, PNorm = 58.1501, GNorm = 1.4892, lr_0 = 4.0054e-04
Loss = 4.2422e-01, PNorm = 58.1573, GNorm = 1.4279, lr_0 = 4.0027e-04
Loss = 4.1991e-01, PNorm = 58.1627, GNorm = 1.2557, lr_0 = 3.9999e-04
Loss = 3.6086e-01, PNorm = 58.1676, GNorm = 1.1515, lr_0 = 3.9972e-04
Loss = 4.3425e-01, PNorm = 58.1785, GNorm = 1.7790, lr_0 = 3.9945e-04
Loss = 4.0827e-01, PNorm = 58.1901, GNorm = 1.4732, lr_0 = 3.9917e-04
Loss = 3.6532e-01, PNorm = 58.2012, GNorm = 1.4719, lr_0 = 3.9890e-04
Loss = 3.9063e-01, PNorm = 58.2053, GNorm = 2.1040, lr_0 = 3.9863e-04
Loss = 3.9563e-01, PNorm = 58.2186, GNorm = 1.0518, lr_0 = 3.9835e-04
Loss = 3.6822e-01, PNorm = 58.2277, GNorm = 1.4705, lr_0 = 3.9808e-04
Loss = 3.6257e-01, PNorm = 58.2292, GNorm = 1.3603, lr_0 = 3.9781e-04
Loss = 3.8337e-01, PNorm = 58.2374, GNorm = 1.2905, lr_0 = 3.9753e-04
Loss = 3.9590e-01, PNorm = 58.2436, GNorm = 2.4356, lr_0 = 3.9726e-04
Loss = 3.8032e-01, PNorm = 58.2485, GNorm = 0.9681, lr_0 = 3.9699e-04
Loss = 4.2582e-01, PNorm = 58.2578, GNorm = 1.5558, lr_0 = 3.9672e-04
Loss = 4.0747e-01, PNorm = 58.2650, GNorm = 0.9789, lr_0 = 3.9645e-04
Loss = 3.2441e-01, PNorm = 58.2756, GNorm = 1.0873, lr_0 = 3.9617e-04
Loss = 3.9375e-01, PNorm = 58.2788, GNorm = 1.0725, lr_0 = 3.9590e-04
Loss = 3.9435e-01, PNorm = 58.2838, GNorm = 1.6322, lr_0 = 3.9563e-04
Loss = 3.9164e-01, PNorm = 58.2890, GNorm = 1.3534, lr_0 = 3.9536e-04
Loss = 4.3297e-01, PNorm = 58.2968, GNorm = 1.4867, lr_0 = 3.9509e-04
Loss = 3.8889e-01, PNorm = 58.3048, GNorm = 1.7788, lr_0 = 3.9482e-04
Loss = 3.8129e-01, PNorm = 58.3083, GNorm = 1.4150, lr_0 = 3.9455e-04
Loss = 3.3722e-01, PNorm = 58.3141, GNorm = 1.4234, lr_0 = 3.9428e-04
Loss = 3.9189e-01, PNorm = 58.3162, GNorm = 1.4995, lr_0 = 3.9401e-04
Loss = 4.1899e-01, PNorm = 58.3221, GNorm = 1.2573, lr_0 = 3.9374e-04
Loss = 4.5225e-01, PNorm = 58.3353, GNorm = 1.4420, lr_0 = 3.9347e-04
Loss = 4.3316e-01, PNorm = 58.3434, GNorm = 1.4774, lr_0 = 3.9320e-04
Loss = 4.0890e-01, PNorm = 58.3467, GNorm = 1.4453, lr_0 = 3.9293e-04
Loss = 4.4663e-01, PNorm = 58.3546, GNorm = 1.4199, lr_0 = 3.9266e-04
Loss = 4.2596e-01, PNorm = 58.3553, GNorm = 1.0561, lr_0 = 3.9239e-04
Loss = 4.2096e-01, PNorm = 58.3597, GNorm = 1.1184, lr_0 = 3.9212e-04
Loss = 3.7386e-01, PNorm = 58.3673, GNorm = 1.4035, lr_0 = 3.9185e-04
Loss = 3.8629e-01, PNorm = 58.3747, GNorm = 1.9322, lr_0 = 3.9159e-04
Loss = 3.8057e-01, PNorm = 58.3784, GNorm = 0.8427, lr_0 = 3.9132e-04
Loss = 4.4113e-01, PNorm = 58.3836, GNorm = 2.0406, lr_0 = 3.9105e-04
Loss = 3.7681e-01, PNorm = 58.3922, GNorm = 1.3902, lr_0 = 3.9078e-04
Loss = 4.4033e-01, PNorm = 58.4003, GNorm = 1.5754, lr_0 = 3.9051e-04
Loss = 3.8363e-01, PNorm = 58.4095, GNorm = 1.2965, lr_0 = 3.9025e-04
Loss = 3.9434e-01, PNorm = 58.4153, GNorm = 1.8258, lr_0 = 3.8998e-04
Loss = 3.8155e-01, PNorm = 58.4265, GNorm = 1.5520, lr_0 = 3.8971e-04
Loss = 3.9234e-01, PNorm = 58.4326, GNorm = 1.2933, lr_0 = 3.8945e-04
Loss = 4.2779e-01, PNorm = 58.4399, GNorm = 1.3316, lr_0 = 3.8918e-04
Loss = 3.9914e-01, PNorm = 58.4413, GNorm = 1.4482, lr_0 = 3.8891e-04
Loss = 3.7313e-01, PNorm = 58.4451, GNorm = 1.2073, lr_0 = 3.8865e-04
Loss = 3.9393e-01, PNorm = 58.4460, GNorm = 1.3759, lr_0 = 3.8838e-04
Loss = 4.1660e-01, PNorm = 58.4547, GNorm = 1.0997, lr_0 = 3.8811e-04
Loss = 3.9975e-01, PNorm = 58.4591, GNorm = 1.2101, lr_0 = 3.8785e-04
Loss = 4.2580e-01, PNorm = 58.4619, GNorm = 1.6449, lr_0 = 3.8758e-04
Loss = 4.3176e-01, PNorm = 58.4712, GNorm = 1.2468, lr_0 = 3.8732e-04
Loss = 3.6558e-01, PNorm = 58.4793, GNorm = 1.8660, lr_0 = 3.8705e-04
Loss = 4.1071e-01, PNorm = 58.4822, GNorm = 1.4617, lr_0 = 3.8679e-04
Loss = 3.7248e-01, PNorm = 58.4909, GNorm = 1.3121, lr_0 = 3.8652e-04
Loss = 3.5473e-01, PNorm = 58.4951, GNorm = 1.1922, lr_0 = 3.8626e-04
Loss = 4.2803e-01, PNorm = 58.5003, GNorm = 1.1252, lr_0 = 3.8599e-04
Loss = 4.2085e-01, PNorm = 58.5060, GNorm = 1.7123, lr_0 = 3.8573e-04
Loss = 3.7790e-01, PNorm = 58.5103, GNorm = 1.2394, lr_0 = 3.8546e-04
Loss = 3.7195e-01, PNorm = 58.5131, GNorm = 1.6625, lr_0 = 3.8520e-04
Loss = 4.0046e-01, PNorm = 58.5205, GNorm = 1.3641, lr_0 = 3.8493e-04
Loss = 4.1129e-01, PNorm = 58.5232, GNorm = 1.5080, lr_0 = 3.8467e-04
Loss = 4.0223e-01, PNorm = 58.5265, GNorm = 1.2359, lr_0 = 3.8441e-04
Loss = 3.6212e-01, PNorm = 58.5309, GNorm = 1.2810, lr_0 = 3.8414e-04
Loss = 4.1657e-01, PNorm = 58.5320, GNorm = 2.1628, lr_0 = 3.8388e-04
Loss = 3.5878e-01, PNorm = 58.5370, GNorm = 1.3620, lr_0 = 3.8362e-04
Loss = 3.6819e-01, PNorm = 58.5379, GNorm = 1.0696, lr_0 = 3.8336e-04
Loss = 4.2618e-01, PNorm = 58.5445, GNorm = 1.7936, lr_0 = 3.8309e-04
Loss = 3.8491e-01, PNorm = 58.5501, GNorm = 1.3719, lr_0 = 3.8283e-04
Loss = 4.0836e-01, PNorm = 58.5574, GNorm = 2.2505, lr_0 = 3.8257e-04
Loss = 3.6654e-01, PNorm = 58.5593, GNorm = 1.4371, lr_0 = 3.8231e-04
Loss = 3.6927e-01, PNorm = 58.5657, GNorm = 1.5117, lr_0 = 3.8204e-04
Loss = 4.1893e-01, PNorm = 58.5713, GNorm = 1.2785, lr_0 = 3.8178e-04
Loss = 4.2420e-01, PNorm = 58.5814, GNorm = 1.5825, lr_0 = 3.8152e-04
Loss = 3.4807e-01, PNorm = 58.5840, GNorm = 1.8457, lr_0 = 3.8126e-04
Loss = 3.6992e-01, PNorm = 58.5889, GNorm = 1.3479, lr_0 = 3.8100e-04
Loss = 3.8670e-01, PNorm = 58.5952, GNorm = 1.0381, lr_0 = 3.8074e-04
Loss = 4.1355e-01, PNorm = 58.6022, GNorm = 1.3623, lr_0 = 3.8048e-04
Loss = 3.9288e-01, PNorm = 58.6094, GNorm = 1.3346, lr_0 = 3.8022e-04
Loss = 3.5362e-01, PNorm = 58.6146, GNorm = 1.8870, lr_0 = 3.7995e-04
Loss = 3.8843e-01, PNorm = 58.6270, GNorm = 1.2114, lr_0 = 3.7969e-04
Loss = 3.6103e-01, PNorm = 58.6304, GNorm = 1.3180, lr_0 = 3.7943e-04
Loss = 3.8334e-01, PNorm = 58.6377, GNorm = 1.4886, lr_0 = 3.7917e-04
Loss = 4.9058e-01, PNorm = 58.6461, GNorm = 1.8444, lr_0 = 3.7891e-04
Loss = 4.5040e-01, PNorm = 58.6509, GNorm = 1.1338, lr_0 = 3.7866e-04
Loss = 4.3120e-01, PNorm = 58.6575, GNorm = 1.2842, lr_0 = 3.7840e-04
Loss = 4.4060e-01, PNorm = 58.6669, GNorm = 2.1139, lr_0 = 3.7814e-04
Loss = 3.4934e-01, PNorm = 58.6709, GNorm = 0.7860, lr_0 = 3.7788e-04
Loss = 4.3567e-01, PNorm = 58.6730, GNorm = 1.5217, lr_0 = 3.7762e-04
Loss = 3.6289e-01, PNorm = 58.6853, GNorm = 1.2743, lr_0 = 3.7736e-04
Loss = 3.5143e-01, PNorm = 58.6894, GNorm = 1.2828, lr_0 = 3.7710e-04
Loss = 4.1975e-01, PNorm = 58.6956, GNorm = 1.5032, lr_0 = 3.7684e-04
Loss = 4.2945e-01, PNorm = 58.6971, GNorm = 1.3386, lr_0 = 3.7659e-04
Loss = 4.0682e-01, PNorm = 58.7027, GNorm = 0.9098, lr_0 = 3.7633e-04
Loss = 4.0646e-01, PNorm = 58.7087, GNorm = 1.6928, lr_0 = 3.7607e-04
Loss = 4.0118e-01, PNorm = 58.7114, GNorm = 1.1810, lr_0 = 3.7581e-04
Loss = 4.1550e-01, PNorm = 58.7147, GNorm = 1.4303, lr_0 = 3.7555e-04
Loss = 4.2370e-01, PNorm = 58.7201, GNorm = 1.3797, lr_0 = 3.7530e-04
Loss = 3.8230e-01, PNorm = 58.7231, GNorm = 1.5590, lr_0 = 3.7504e-04
Loss = 3.6270e-01, PNorm = 58.7265, GNorm = 1.2047, lr_0 = 3.7478e-04
Loss = 4.4713e-01, PNorm = 58.7318, GNorm = 1.3625, lr_0 = 3.7453e-04
Loss = 3.6871e-01, PNorm = 58.7366, GNorm = 1.1701, lr_0 = 3.7427e-04
Loss = 3.6630e-01, PNorm = 58.7415, GNorm = 1.0552, lr_0 = 3.7401e-04
Loss = 4.2853e-01, PNorm = 58.7401, GNorm = 1.3803, lr_0 = 3.7376e-04
Loss = 3.8653e-01, PNorm = 58.7445, GNorm = 1.4566, lr_0 = 3.7350e-04
Loss = 4.4781e-01, PNorm = 58.7497, GNorm = 1.8574, lr_0 = 3.7325e-04
Loss = 3.7495e-01, PNorm = 58.7563, GNorm = 1.3243, lr_0 = 3.7299e-04
Loss = 3.7388e-01, PNorm = 58.7591, GNorm = 1.5119, lr_0 = 3.7273e-04
Validation mae = 0.112912
Epoch 14
Loss = 3.4392e-01, PNorm = 58.7599, GNorm = 1.2848, lr_0 = 3.7248e-04
Loss = 3.5192e-01, PNorm = 58.7610, GNorm = 1.0657, lr_0 = 3.7222e-04
Loss = 3.9046e-01, PNorm = 58.7659, GNorm = 1.3629, lr_0 = 3.7197e-04
Loss = 3.6537e-01, PNorm = 58.7739, GNorm = 0.9138, lr_0 = 3.7171e-04
Loss = 3.9213e-01, PNorm = 58.7812, GNorm = 1.3153, lr_0 = 3.7146e-04
Loss = 3.5887e-01, PNorm = 58.7892, GNorm = 2.0098, lr_0 = 3.7120e-04
Loss = 3.8146e-01, PNorm = 58.7961, GNorm = 1.9793, lr_0 = 3.7095e-04
Loss = 3.3619e-01, PNorm = 58.8039, GNorm = 1.0994, lr_0 = 3.7070e-04
Loss = 4.5151e-01, PNorm = 58.8080, GNorm = 1.2377, lr_0 = 3.7044e-04
Loss = 4.1512e-01, PNorm = 58.8164, GNorm = 1.6196, lr_0 = 3.7019e-04
Loss = 4.0137e-01, PNorm = 58.8183, GNorm = 1.2515, lr_0 = 3.6993e-04
Loss = 4.0577e-01, PNorm = 58.8241, GNorm = 1.2008, lr_0 = 3.6968e-04
Loss = 3.4408e-01, PNorm = 58.8333, GNorm = 1.2310, lr_0 = 3.6943e-04
Loss = 3.7827e-01, PNorm = 58.8379, GNorm = 1.4540, lr_0 = 3.6917e-04
Loss = 4.1104e-01, PNorm = 58.8393, GNorm = 2.5274, lr_0 = 3.6892e-04
Loss = 3.6215e-01, PNorm = 58.8422, GNorm = 1.3345, lr_0 = 3.6867e-04
Loss = 4.2046e-01, PNorm = 58.8513, GNorm = 1.3012, lr_0 = 3.6842e-04
Loss = 4.0354e-01, PNorm = 58.8590, GNorm = 1.6987, lr_0 = 3.6816e-04
Loss = 4.2574e-01, PNorm = 58.8663, GNorm = 1.7275, lr_0 = 3.6791e-04
Loss = 3.8363e-01, PNorm = 58.8693, GNorm = 1.2440, lr_0 = 3.6766e-04
Loss = 4.1619e-01, PNorm = 58.8769, GNorm = 1.8937, lr_0 = 3.6741e-04
Loss = 3.5207e-01, PNorm = 58.8820, GNorm = 1.6494, lr_0 = 3.6716e-04
Loss = 3.6812e-01, PNorm = 58.8869, GNorm = 1.6486, lr_0 = 3.6690e-04
Loss = 3.9624e-01, PNorm = 58.8927, GNorm = 1.3232, lr_0 = 3.6665e-04
Loss = 3.6721e-01, PNorm = 58.8942, GNorm = 1.1993, lr_0 = 3.6640e-04
Loss = 4.1224e-01, PNorm = 58.9024, GNorm = 1.6188, lr_0 = 3.6615e-04
Loss = 3.5124e-01, PNorm = 58.9079, GNorm = 1.0885, lr_0 = 3.6590e-04
Loss = 4.5502e-01, PNorm = 58.9117, GNorm = 1.7616, lr_0 = 3.6565e-04
Loss = 4.1941e-01, PNorm = 58.9154, GNorm = 1.3507, lr_0 = 3.6540e-04
Loss = 4.2991e-01, PNorm = 58.9224, GNorm = 1.9002, lr_0 = 3.6515e-04
Loss = 3.7756e-01, PNorm = 58.9226, GNorm = 1.6721, lr_0 = 3.6490e-04
Loss = 3.9560e-01, PNorm = 58.9281, GNorm = 1.1600, lr_0 = 3.6465e-04
Loss = 3.6627e-01, PNorm = 58.9340, GNorm = 1.4530, lr_0 = 3.6440e-04
Loss = 4.0821e-01, PNorm = 58.9393, GNorm = 1.2063, lr_0 = 3.6415e-04
Loss = 4.0028e-01, PNorm = 58.9454, GNorm = 1.1037, lr_0 = 3.6390e-04
Loss = 3.8856e-01, PNorm = 58.9511, GNorm = 1.6039, lr_0 = 3.6365e-04
Loss = 3.6199e-01, PNorm = 58.9529, GNorm = 1.2053, lr_0 = 3.6340e-04
Loss = 3.9993e-01, PNorm = 58.9554, GNorm = 1.3481, lr_0 = 3.6315e-04
Loss = 4.2451e-01, PNorm = 58.9571, GNorm = 1.2702, lr_0 = 3.6290e-04
Loss = 4.1384e-01, PNorm = 58.9657, GNorm = 1.1285, lr_0 = 3.6266e-04
Loss = 4.0590e-01, PNorm = 58.9726, GNorm = 1.5633, lr_0 = 3.6241e-04
Loss = 4.0199e-01, PNorm = 58.9842, GNorm = 1.3890, lr_0 = 3.6216e-04
Loss = 3.9519e-01, PNorm = 58.9870, GNorm = 1.4830, lr_0 = 3.6191e-04
Loss = 3.8853e-01, PNorm = 58.9935, GNorm = 1.2175, lr_0 = 3.6166e-04
Loss = 4.3467e-01, PNorm = 59.0004, GNorm = 1.4754, lr_0 = 3.6141e-04
Loss = 3.5505e-01, PNorm = 59.0059, GNorm = 2.0059, lr_0 = 3.6117e-04
Loss = 4.0413e-01, PNorm = 59.0174, GNorm = 1.6302, lr_0 = 3.6092e-04
Loss = 3.4701e-01, PNorm = 59.0256, GNorm = 1.3505, lr_0 = 3.6067e-04
Loss = 3.7533e-01, PNorm = 59.0296, GNorm = 1.2804, lr_0 = 3.6043e-04
Loss = 3.9602e-01, PNorm = 59.0369, GNorm = 1.0405, lr_0 = 3.6018e-04
Loss = 3.9247e-01, PNorm = 59.0405, GNorm = 1.1663, lr_0 = 3.5993e-04
Loss = 3.9067e-01, PNorm = 59.0426, GNorm = 1.0985, lr_0 = 3.5969e-04
Loss = 4.0530e-01, PNorm = 59.0452, GNorm = 2.0562, lr_0 = 3.5944e-04
Loss = 3.5760e-01, PNorm = 59.0534, GNorm = 1.4917, lr_0 = 3.5919e-04
Loss = 3.5456e-01, PNorm = 59.0585, GNorm = 1.2096, lr_0 = 3.5895e-04
Loss = 4.2245e-01, PNorm = 59.0678, GNorm = 1.1039, lr_0 = 3.5870e-04
Loss = 3.0190e-01, PNorm = 59.0697, GNorm = 1.2501, lr_0 = 3.5845e-04
Loss = 3.6004e-01, PNorm = 59.0712, GNorm = 0.8970, lr_0 = 3.5821e-04
Loss = 3.8800e-01, PNorm = 59.0732, GNorm = 1.4240, lr_0 = 3.5796e-04
Loss = 3.8905e-01, PNorm = 59.0776, GNorm = 1.3719, lr_0 = 3.5772e-04
Loss = 3.7775e-01, PNorm = 59.0821, GNorm = 1.7481, lr_0 = 3.5747e-04
Loss = 3.4092e-01, PNorm = 59.0887, GNorm = 1.0001, lr_0 = 3.5723e-04
Loss = 4.4394e-01, PNorm = 59.0917, GNorm = 1.2755, lr_0 = 3.5698e-04
Loss = 3.8394e-01, PNorm = 59.1016, GNorm = 1.7559, lr_0 = 3.5674e-04
Loss = 3.9745e-01, PNorm = 59.1109, GNorm = 1.2699, lr_0 = 3.5650e-04
Loss = 3.9132e-01, PNorm = 59.1149, GNorm = 1.3039, lr_0 = 3.5625e-04
Loss = 4.2175e-01, PNorm = 59.1247, GNorm = 1.2006, lr_0 = 3.5601e-04
Loss = 4.3869e-01, PNorm = 59.1245, GNorm = 1.7026, lr_0 = 3.5576e-04
Loss = 3.6862e-01, PNorm = 59.1285, GNorm = 1.2719, lr_0 = 3.5552e-04
Loss = 4.6217e-01, PNorm = 59.1352, GNorm = 1.0688, lr_0 = 3.5528e-04
Loss = 3.9074e-01, PNorm = 59.1457, GNorm = 1.3588, lr_0 = 3.5503e-04
Loss = 4.1741e-01, PNorm = 59.1504, GNorm = 1.7280, lr_0 = 3.5479e-04
Loss = 3.6526e-01, PNorm = 59.1503, GNorm = 2.6088, lr_0 = 3.5455e-04
Loss = 4.6054e-01, PNorm = 59.1603, GNorm = 1.7915, lr_0 = 3.5430e-04
Loss = 4.0785e-01, PNorm = 59.1671, GNorm = 1.4565, lr_0 = 3.5406e-04
Loss = 3.8050e-01, PNorm = 59.1752, GNorm = 0.9585, lr_0 = 3.5382e-04
Loss = 3.8481e-01, PNorm = 59.1828, GNorm = 1.2307, lr_0 = 3.5358e-04
Loss = 4.0276e-01, PNorm = 59.1886, GNorm = 1.4384, lr_0 = 3.5333e-04
Loss = 4.0036e-01, PNorm = 59.1945, GNorm = 1.3686, lr_0 = 3.5309e-04
Loss = 3.8461e-01, PNorm = 59.1959, GNorm = 0.9294, lr_0 = 3.5285e-04
Loss = 3.8737e-01, PNorm = 59.2035, GNorm = 1.2016, lr_0 = 3.5261e-04
Loss = 3.9099e-01, PNorm = 59.2057, GNorm = 0.8982, lr_0 = 3.5237e-04
Loss = 4.6361e-01, PNorm = 59.2117, GNorm = 1.4731, lr_0 = 3.5212e-04
Loss = 3.8407e-01, PNorm = 59.2175, GNorm = 1.3403, lr_0 = 3.5188e-04
Loss = 3.7811e-01, PNorm = 59.2232, GNorm = 1.3931, lr_0 = 3.5164e-04
Loss = 4.1467e-01, PNorm = 59.2286, GNorm = 1.2200, lr_0 = 3.5140e-04
Loss = 3.8139e-01, PNorm = 59.2323, GNorm = 1.3682, lr_0 = 3.5116e-04
Loss = 4.2407e-01, PNorm = 59.2323, GNorm = 1.3816, lr_0 = 3.5092e-04
Loss = 4.2154e-01, PNorm = 59.2352, GNorm = 1.4786, lr_0 = 3.5068e-04
Loss = 3.7020e-01, PNorm = 59.2423, GNorm = 1.7213, lr_0 = 3.5044e-04
Loss = 3.8567e-01, PNorm = 59.2493, GNorm = 0.9464, lr_0 = 3.5020e-04
Loss = 4.0521e-01, PNorm = 59.2573, GNorm = 2.0013, lr_0 = 3.4996e-04
Loss = 3.8255e-01, PNorm = 59.2574, GNorm = 1.6969, lr_0 = 3.4972e-04
Loss = 4.3027e-01, PNorm = 59.2667, GNorm = 1.4541, lr_0 = 3.4948e-04
Loss = 3.9630e-01, PNorm = 59.2698, GNorm = 1.2006, lr_0 = 3.4924e-04
Loss = 4.0591e-01, PNorm = 59.2744, GNorm = 1.1448, lr_0 = 3.4900e-04
Loss = 3.8574e-01, PNorm = 59.2759, GNorm = 3.3168, lr_0 = 3.4876e-04
Loss = 3.9989e-01, PNorm = 59.2751, GNorm = 1.0452, lr_0 = 3.4852e-04
Loss = 4.0838e-01, PNorm = 59.2797, GNorm = 1.4713, lr_0 = 3.4828e-04
Loss = 4.3120e-01, PNorm = 59.2818, GNorm = 1.5891, lr_0 = 3.4805e-04
Loss = 3.8546e-01, PNorm = 59.2894, GNorm = 1.7879, lr_0 = 3.4781e-04
Loss = 3.5304e-01, PNorm = 59.2946, GNorm = 1.6904, lr_0 = 3.4757e-04
Loss = 3.9974e-01, PNorm = 59.3039, GNorm = 1.3179, lr_0 = 3.4733e-04
Loss = 3.8028e-01, PNorm = 59.3123, GNorm = 1.7945, lr_0 = 3.4709e-04
Loss = 4.8032e-01, PNorm = 59.3173, GNorm = 1.2951, lr_0 = 3.4686e-04
Loss = 3.6601e-01, PNorm = 59.3255, GNorm = 1.2357, lr_0 = 3.4662e-04
Loss = 3.0948e-01, PNorm = 59.3291, GNorm = 1.3144, lr_0 = 3.4638e-04
Loss = 3.6966e-01, PNorm = 59.3348, GNorm = 1.8015, lr_0 = 3.4614e-04
Loss = 3.3337e-01, PNorm = 59.3366, GNorm = 0.9362, lr_0 = 3.4591e-04
Loss = 3.6660e-01, PNorm = 59.3404, GNorm = 1.2880, lr_0 = 3.4567e-04
Loss = 3.6039e-01, PNorm = 59.3442, GNorm = 1.2816, lr_0 = 3.4543e-04
Loss = 3.9799e-01, PNorm = 59.3485, GNorm = 1.1310, lr_0 = 3.4520e-04
Loss = 3.9639e-01, PNorm = 59.3502, GNorm = 1.0750, lr_0 = 3.4496e-04
Loss = 3.9669e-01, PNorm = 59.3549, GNorm = 1.4645, lr_0 = 3.4472e-04
Loss = 3.8804e-01, PNorm = 59.3560, GNorm = 1.2387, lr_0 = 3.4449e-04
Loss = 3.3807e-01, PNorm = 59.3599, GNorm = 1.0860, lr_0 = 3.4425e-04
Loss = 4.0633e-01, PNorm = 59.3638, GNorm = 1.4740, lr_0 = 3.4402e-04
Loss = 3.7798e-01, PNorm = 59.3713, GNorm = 1.1604, lr_0 = 3.4378e-04
Loss = 3.8033e-01, PNorm = 59.3774, GNorm = 2.1057, lr_0 = 3.4354e-04
Loss = 3.7282e-01, PNorm = 59.3799, GNorm = 1.4970, lr_0 = 3.4331e-04
Validation mae = 0.112409
Epoch 15
Loss = 3.5026e-01, PNorm = 59.3820, GNorm = 1.8326, lr_0 = 3.4307e-04
Loss = 4.0727e-01, PNorm = 59.3845, GNorm = 1.4876, lr_0 = 3.4284e-04
Loss = 3.8728e-01, PNorm = 59.3890, GNorm = 1.3047, lr_0 = 3.4260e-04
Loss = 3.4883e-01, PNorm = 59.3904, GNorm = 1.2707, lr_0 = 3.4237e-04
Loss = 4.1035e-01, PNorm = 59.3931, GNorm = 1.3822, lr_0 = 3.4213e-04
Loss = 4.0010e-01, PNorm = 59.3973, GNorm = 1.2072, lr_0 = 3.4190e-04
Loss = 4.3197e-01, PNorm = 59.4059, GNorm = 1.8540, lr_0 = 3.4167e-04
Loss = 3.8181e-01, PNorm = 59.4099, GNorm = 1.6251, lr_0 = 3.4143e-04
Loss = 3.8492e-01, PNorm = 59.4123, GNorm = 1.4542, lr_0 = 3.4120e-04
Loss = 3.4938e-01, PNorm = 59.4211, GNorm = 1.0988, lr_0 = 3.4096e-04
Loss = 3.8817e-01, PNorm = 59.4240, GNorm = 1.1957, lr_0 = 3.4073e-04
Loss = 3.9207e-01, PNorm = 59.4309, GNorm = 2.3810, lr_0 = 3.4050e-04
Loss = 3.4964e-01, PNorm = 59.4363, GNorm = 1.3658, lr_0 = 3.4026e-04
Loss = 4.2873e-01, PNorm = 59.4439, GNorm = 0.9918, lr_0 = 3.4003e-04
Loss = 3.5857e-01, PNorm = 59.4504, GNorm = 1.6017, lr_0 = 3.3980e-04
Loss = 3.8019e-01, PNorm = 59.4532, GNorm = 2.0903, lr_0 = 3.3956e-04
Loss = 3.8149e-01, PNorm = 59.4560, GNorm = 1.4276, lr_0 = 3.3933e-04
Loss = 4.1862e-01, PNorm = 59.4594, GNorm = 1.4379, lr_0 = 3.3910e-04
Loss = 4.0485e-01, PNorm = 59.4653, GNorm = 1.9116, lr_0 = 3.3887e-04
Loss = 3.3200e-01, PNorm = 59.4691, GNorm = 1.5757, lr_0 = 3.3864e-04
Loss = 4.2842e-01, PNorm = 59.4739, GNorm = 1.1947, lr_0 = 3.3840e-04
Loss = 3.3885e-01, PNorm = 59.4739, GNorm = 1.4372, lr_0 = 3.3817e-04
Loss = 3.8954e-01, PNorm = 59.4759, GNorm = 1.6648, lr_0 = 3.3794e-04
Loss = 4.0647e-01, PNorm = 59.4800, GNorm = 1.2258, lr_0 = 3.3771e-04
Loss = 3.8842e-01, PNorm = 59.4893, GNorm = 1.5752, lr_0 = 3.3748e-04
Loss = 4.2693e-01, PNorm = 59.4951, GNorm = 1.2105, lr_0 = 3.3725e-04
Loss = 4.0251e-01, PNorm = 59.5020, GNorm = 1.1018, lr_0 = 3.3701e-04
Loss = 3.5588e-01, PNorm = 59.5057, GNorm = 2.0202, lr_0 = 3.3678e-04
Loss = 3.1070e-01, PNorm = 59.5119, GNorm = 1.1916, lr_0 = 3.3655e-04
Loss = 3.9692e-01, PNorm = 59.5124, GNorm = 1.1480, lr_0 = 3.3632e-04
Loss = 4.1326e-01, PNorm = 59.5162, GNorm = 2.4320, lr_0 = 3.3609e-04
Loss = 4.0526e-01, PNorm = 59.5252, GNorm = 1.3688, lr_0 = 3.3586e-04
Loss = 3.4604e-01, PNorm = 59.5277, GNorm = 1.0255, lr_0 = 3.3563e-04
Loss = 3.5554e-01, PNorm = 59.5305, GNorm = 1.4951, lr_0 = 3.3540e-04
Loss = 3.7728e-01, PNorm = 59.5370, GNorm = 1.2219, lr_0 = 3.3517e-04
Loss = 3.4207e-01, PNorm = 59.5448, GNorm = 1.6268, lr_0 = 3.3494e-04
Loss = 4.0890e-01, PNorm = 59.5485, GNorm = 1.3493, lr_0 = 3.3471e-04
Loss = 3.8765e-01, PNorm = 59.5517, GNorm = 1.5278, lr_0 = 3.3448e-04
Loss = 3.5078e-01, PNorm = 59.5600, GNorm = 1.7259, lr_0 = 3.3425e-04
Loss = 3.8637e-01, PNorm = 59.5596, GNorm = 1.7625, lr_0 = 3.3403e-04
Loss = 4.1488e-01, PNorm = 59.5675, GNorm = 1.2709, lr_0 = 3.3380e-04
Loss = 4.0427e-01, PNorm = 59.5746, GNorm = 2.0803, lr_0 = 3.3357e-04
Loss = 4.0984e-01, PNorm = 59.5772, GNorm = 1.2792, lr_0 = 3.3334e-04
Loss = 3.8800e-01, PNorm = 59.5807, GNorm = 1.1906, lr_0 = 3.3311e-04
Loss = 4.0066e-01, PNorm = 59.5866, GNorm = 1.5098, lr_0 = 3.3288e-04
Loss = 4.3279e-01, PNorm = 59.5958, GNorm = 1.8644, lr_0 = 3.3265e-04
Loss = 4.1202e-01, PNorm = 59.5992, GNorm = 1.5237, lr_0 = 3.3243e-04
Loss = 4.7962e-01, PNorm = 59.6010, GNorm = 1.8238, lr_0 = 3.3220e-04
Loss = 3.5234e-01, PNorm = 59.6059, GNorm = 1.1806, lr_0 = 3.3197e-04
Loss = 4.1621e-01, PNorm = 59.6078, GNorm = 1.4058, lr_0 = 3.3174e-04
Loss = 3.9431e-01, PNorm = 59.6106, GNorm = 1.6829, lr_0 = 3.3152e-04
Loss = 4.0755e-01, PNorm = 59.6154, GNorm = 1.1455, lr_0 = 3.3129e-04
Loss = 4.3439e-01, PNorm = 59.6163, GNorm = 1.5975, lr_0 = 3.3106e-04
Loss = 4.0216e-01, PNorm = 59.6215, GNorm = 1.1861, lr_0 = 3.3084e-04
Loss = 3.4795e-01, PNorm = 59.6266, GNorm = 1.3039, lr_0 = 3.3061e-04
Loss = 3.3556e-01, PNorm = 59.6327, GNorm = 0.9442, lr_0 = 3.3038e-04
Loss = 4.0004e-01, PNorm = 59.6383, GNorm = 1.6938, lr_0 = 3.3016e-04
Loss = 3.8807e-01, PNorm = 59.6420, GNorm = 1.7092, lr_0 = 3.2993e-04
Loss = 4.4656e-01, PNorm = 59.6496, GNorm = 1.6223, lr_0 = 3.2970e-04
Loss = 3.5918e-01, PNorm = 59.6568, GNorm = 1.3665, lr_0 = 3.2948e-04
Loss = 3.7736e-01, PNorm = 59.6622, GNorm = 1.1000, lr_0 = 3.2925e-04
Loss = 3.3462e-01, PNorm = 59.6686, GNorm = 1.3163, lr_0 = 3.2903e-04
Loss = 3.3103e-01, PNorm = 59.6678, GNorm = 0.9116, lr_0 = 3.2880e-04
Loss = 3.5908e-01, PNorm = 59.6749, GNorm = 1.2378, lr_0 = 3.2858e-04
Loss = 3.8699e-01, PNorm = 59.6768, GNorm = 1.1707, lr_0 = 3.2835e-04
Loss = 3.6940e-01, PNorm = 59.6865, GNorm = 1.1799, lr_0 = 3.2813e-04
Loss = 4.0290e-01, PNorm = 59.6899, GNorm = 1.7260, lr_0 = 3.2790e-04
Loss = 3.9005e-01, PNorm = 59.6945, GNorm = 1.3514, lr_0 = 3.2768e-04
Loss = 3.4339e-01, PNorm = 59.6983, GNorm = 1.2936, lr_0 = 3.2745e-04
Loss = 3.9647e-01, PNorm = 59.7001, GNorm = 1.4544, lr_0 = 3.2723e-04
Loss = 4.2618e-01, PNorm = 59.7051, GNorm = 1.5332, lr_0 = 3.2700e-04
Loss = 3.4557e-01, PNorm = 59.7075, GNorm = 1.5418, lr_0 = 3.2678e-04
Loss = 3.8385e-01, PNorm = 59.7101, GNorm = 1.1620, lr_0 = 3.2656e-04
Loss = 4.0389e-01, PNorm = 59.7158, GNorm = 1.6766, lr_0 = 3.2633e-04
Loss = 3.6594e-01, PNorm = 59.7198, GNorm = 1.3199, lr_0 = 3.2611e-04
Loss = 4.1288e-01, PNorm = 59.7245, GNorm = 1.4249, lr_0 = 3.2589e-04
Loss = 3.7628e-01, PNorm = 59.7271, GNorm = 1.4843, lr_0 = 3.2566e-04
Loss = 4.0617e-01, PNorm = 59.7322, GNorm = 1.3316, lr_0 = 3.2544e-04
Loss = 3.5637e-01, PNorm = 59.7436, GNorm = 1.2662, lr_0 = 3.2522e-04
Loss = 3.8819e-01, PNorm = 59.7430, GNorm = 1.1519, lr_0 = 3.2499e-04
Loss = 3.7867e-01, PNorm = 59.7458, GNorm = 1.3742, lr_0 = 3.2477e-04
Loss = 3.9587e-01, PNorm = 59.7487, GNorm = 1.1656, lr_0 = 3.2455e-04
Loss = 3.6805e-01, PNorm = 59.7513, GNorm = 0.9581, lr_0 = 3.2433e-04
Loss = 4.0122e-01, PNorm = 59.7539, GNorm = 1.8078, lr_0 = 3.2410e-04
Loss = 3.6031e-01, PNorm = 59.7601, GNorm = 1.1830, lr_0 = 3.2388e-04
Loss = 3.3237e-01, PNorm = 59.7629, GNorm = 1.5610, lr_0 = 3.2366e-04
Loss = 4.0684e-01, PNorm = 59.7694, GNorm = 1.2456, lr_0 = 3.2344e-04
Loss = 3.4990e-01, PNorm = 59.7720, GNorm = 1.0997, lr_0 = 3.2322e-04
Loss = 3.7563e-01, PNorm = 59.7726, GNorm = 1.4178, lr_0 = 3.2300e-04
Loss = 4.1160e-01, PNorm = 59.7775, GNorm = 1.4421, lr_0 = 3.2277e-04
Loss = 3.9194e-01, PNorm = 59.7841, GNorm = 1.4610, lr_0 = 3.2255e-04
Loss = 3.2494e-01, PNorm = 59.7906, GNorm = 1.6136, lr_0 = 3.2233e-04
Loss = 3.9303e-01, PNorm = 59.7974, GNorm = 1.2810, lr_0 = 3.2211e-04
Loss = 4.2196e-01, PNorm = 59.8047, GNorm = 1.5533, lr_0 = 3.2189e-04
Loss = 3.8766e-01, PNorm = 59.8137, GNorm = 2.0302, lr_0 = 3.2167e-04
Loss = 3.5870e-01, PNorm = 59.8163, GNorm = 1.5142, lr_0 = 3.2145e-04
Loss = 3.5469e-01, PNorm = 59.8201, GNorm = 1.1785, lr_0 = 3.2123e-04
Loss = 3.7161e-01, PNorm = 59.8252, GNorm = 1.1955, lr_0 = 3.2101e-04
Loss = 3.8059e-01, PNorm = 59.8313, GNorm = 1.4313, lr_0 = 3.2079e-04
Loss = 3.9411e-01, PNorm = 59.8367, GNorm = 1.6068, lr_0 = 3.2057e-04
Loss = 4.2305e-01, PNorm = 59.8433, GNorm = 1.3801, lr_0 = 3.2035e-04
Loss = 3.3807e-01, PNorm = 59.8483, GNorm = 2.1880, lr_0 = 3.2013e-04
Loss = 3.4909e-01, PNorm = 59.8563, GNorm = 1.8016, lr_0 = 3.1991e-04
Loss = 3.7377e-01, PNorm = 59.8560, GNorm = 1.3691, lr_0 = 3.1969e-04
Loss = 4.0838e-01, PNorm = 59.8588, GNorm = 1.2305, lr_0 = 3.1947e-04
Loss = 4.4947e-01, PNorm = 59.8642, GNorm = 1.3104, lr_0 = 3.1925e-04
Loss = 3.9017e-01, PNorm = 59.8650, GNorm = 1.0966, lr_0 = 3.1904e-04
Loss = 3.9446e-01, PNorm = 59.8650, GNorm = 2.1816, lr_0 = 3.1882e-04
Loss = 3.4520e-01, PNorm = 59.8699, GNorm = 1.3760, lr_0 = 3.1860e-04
Loss = 3.5392e-01, PNorm = 59.8700, GNorm = 1.1842, lr_0 = 3.1838e-04
Loss = 3.6782e-01, PNorm = 59.8746, GNorm = 1.4140, lr_0 = 3.1816e-04
Loss = 4.1317e-01, PNorm = 59.8752, GNorm = 1.7604, lr_0 = 3.1794e-04
Loss = 3.8776e-01, PNorm = 59.8782, GNorm = 1.6241, lr_0 = 3.1773e-04
Loss = 3.9102e-01, PNorm = 59.8814, GNorm = 1.3398, lr_0 = 3.1751e-04
Loss = 3.1742e-01, PNorm = 59.8887, GNorm = 1.6095, lr_0 = 3.1729e-04
Loss = 4.0749e-01, PNorm = 59.8936, GNorm = 1.5726, lr_0 = 3.1707e-04
Loss = 3.7608e-01, PNorm = 59.9007, GNorm = 1.3645, lr_0 = 3.1686e-04
Loss = 3.9502e-01, PNorm = 59.9083, GNorm = 1.2849, lr_0 = 3.1664e-04
Loss = 4.2085e-01, PNorm = 59.9132, GNorm = 1.1887, lr_0 = 3.1642e-04
Loss = 3.5714e-01, PNorm = 59.9228, GNorm = 1.7730, lr_0 = 3.1621e-04
Validation mae = 0.114230
Epoch 16
Loss = 3.3602e-01, PNorm = 59.9279, GNorm = 1.0720, lr_0 = 3.1599e-04
Loss = 3.4585e-01, PNorm = 59.9338, GNorm = 1.2949, lr_0 = 3.1577e-04
Loss = 4.0671e-01, PNorm = 59.9408, GNorm = 1.1987, lr_0 = 3.1556e-04
Loss = 3.6265e-01, PNorm = 59.9444, GNorm = 1.4254, lr_0 = 3.1534e-04
Loss = 4.2743e-01, PNorm = 59.9440, GNorm = 1.5523, lr_0 = 3.1512e-04
Loss = 3.8162e-01, PNorm = 59.9480, GNorm = 1.5631, lr_0 = 3.1491e-04
Loss = 3.3656e-01, PNorm = 59.9539, GNorm = 1.5434, lr_0 = 3.1469e-04
Loss = 4.0005e-01, PNorm = 59.9567, GNorm = 1.7426, lr_0 = 3.1448e-04
Loss = 3.6158e-01, PNorm = 59.9611, GNorm = 1.3774, lr_0 = 3.1426e-04
Loss = 3.8947e-01, PNorm = 59.9630, GNorm = 0.9782, lr_0 = 3.1405e-04
Loss = 3.5667e-01, PNorm = 59.9671, GNorm = 1.2484, lr_0 = 3.1383e-04
Loss = 4.0727e-01, PNorm = 59.9729, GNorm = 2.0675, lr_0 = 3.1362e-04
Loss = 3.6789e-01, PNorm = 59.9777, GNorm = 1.7645, lr_0 = 3.1340e-04
Loss = 4.3887e-01, PNorm = 59.9820, GNorm = 2.0008, lr_0 = 3.1319e-04
Loss = 3.0559e-01, PNorm = 59.9847, GNorm = 1.5598, lr_0 = 3.1297e-04
Loss = 3.6896e-01, PNorm = 59.9900, GNorm = 1.7593, lr_0 = 3.1276e-04
Loss = 3.6425e-01, PNorm = 59.9950, GNorm = 1.2677, lr_0 = 3.1254e-04
Loss = 3.7058e-01, PNorm = 60.0004, GNorm = 1.1979, lr_0 = 3.1233e-04
Loss = 3.3153e-01, PNorm = 60.0029, GNorm = 1.8153, lr_0 = 3.1212e-04
Loss = 3.8807e-01, PNorm = 60.0045, GNorm = 1.5400, lr_0 = 3.1190e-04
Loss = 3.4768e-01, PNorm = 60.0128, GNorm = 1.1729, lr_0 = 3.1169e-04
Loss = 3.8371e-01, PNorm = 60.0162, GNorm = 1.3870, lr_0 = 3.1147e-04
Loss = 3.7473e-01, PNorm = 60.0243, GNorm = 1.5732, lr_0 = 3.1126e-04
Loss = 3.7143e-01, PNorm = 60.0292, GNorm = 1.2816, lr_0 = 3.1105e-04
Loss = 3.4921e-01, PNorm = 60.0372, GNorm = 1.3624, lr_0 = 3.1083e-04
Loss = 4.0224e-01, PNorm = 60.0396, GNorm = 1.1317, lr_0 = 3.1062e-04
Loss = 3.8778e-01, PNorm = 60.0456, GNorm = 1.5908, lr_0 = 3.1041e-04
Loss = 3.7250e-01, PNorm = 60.0514, GNorm = 1.6201, lr_0 = 3.1020e-04
Loss = 4.7008e-01, PNorm = 60.0550, GNorm = 1.0545, lr_0 = 3.0998e-04
Loss = 3.4104e-01, PNorm = 60.0571, GNorm = 1.1119, lr_0 = 3.0977e-04
Loss = 3.4802e-01, PNorm = 60.0626, GNorm = 1.9555, lr_0 = 3.0956e-04
Loss = 3.5019e-01, PNorm = 60.0680, GNorm = 1.1905, lr_0 = 3.0935e-04
Loss = 3.6250e-01, PNorm = 60.0761, GNorm = 1.2281, lr_0 = 3.0914e-04
Loss = 4.2925e-01, PNorm = 60.0786, GNorm = 1.4794, lr_0 = 3.0892e-04
Loss = 3.6298e-01, PNorm = 60.0837, GNorm = 1.2893, lr_0 = 3.0871e-04
Loss = 3.6585e-01, PNorm = 60.0837, GNorm = 1.7255, lr_0 = 3.0850e-04
Loss = 4.1218e-01, PNorm = 60.0834, GNorm = 1.3170, lr_0 = 3.0829e-04
Loss = 3.6251e-01, PNorm = 60.0852, GNorm = 1.6582, lr_0 = 3.0808e-04
Loss = 3.8048e-01, PNorm = 60.0910, GNorm = 1.2024, lr_0 = 3.0787e-04
Loss = 3.9830e-01, PNorm = 60.0914, GNorm = 1.1134, lr_0 = 3.0766e-04
Loss = 3.6467e-01, PNorm = 60.0972, GNorm = 1.2000, lr_0 = 3.0745e-04
Loss = 3.2062e-01, PNorm = 60.1006, GNorm = 1.1469, lr_0 = 3.0723e-04
Loss = 3.1029e-01, PNorm = 60.1004, GNorm = 1.0018, lr_0 = 3.0702e-04
Loss = 3.5841e-01, PNorm = 60.1029, GNorm = 1.3567, lr_0 = 3.0681e-04
Loss = 3.8648e-01, PNorm = 60.1077, GNorm = 1.9757, lr_0 = 3.0660e-04
Loss = 3.5904e-01, PNorm = 60.1098, GNorm = 1.1919, lr_0 = 3.0639e-04
Loss = 3.3525e-01, PNorm = 60.1164, GNorm = 1.5211, lr_0 = 3.0618e-04
Loss = 3.5748e-01, PNorm = 60.1234, GNorm = 1.7953, lr_0 = 3.0597e-04
Loss = 4.5446e-01, PNorm = 60.1297, GNorm = 1.7820, lr_0 = 3.0576e-04
Loss = 3.9427e-01, PNorm = 60.1338, GNorm = 1.8842, lr_0 = 3.0555e-04
Loss = 3.8020e-01, PNorm = 60.1395, GNorm = 1.8769, lr_0 = 3.0535e-04
Loss = 3.5099e-01, PNorm = 60.1420, GNorm = 2.2407, lr_0 = 3.0514e-04
Loss = 3.8678e-01, PNorm = 60.1463, GNorm = 1.3665, lr_0 = 3.0493e-04
Loss = 4.1153e-01, PNorm = 60.1502, GNorm = 1.6261, lr_0 = 3.0472e-04
Loss = 3.5480e-01, PNorm = 60.1557, GNorm = 1.1992, lr_0 = 3.0451e-04
Loss = 3.4242e-01, PNorm = 60.1561, GNorm = 1.6982, lr_0 = 3.0430e-04
Loss = 3.4895e-01, PNorm = 60.1595, GNorm = 1.2386, lr_0 = 3.0409e-04
Loss = 4.3185e-01, PNorm = 60.1651, GNorm = 1.4083, lr_0 = 3.0388e-04
Loss = 4.0064e-01, PNorm = 60.1708, GNorm = 1.4017, lr_0 = 3.0368e-04
Loss = 3.9183e-01, PNorm = 60.1750, GNorm = 1.3217, lr_0 = 3.0347e-04
Loss = 3.9573e-01, PNorm = 60.1801, GNorm = 1.3214, lr_0 = 3.0326e-04
Loss = 4.1799e-01, PNorm = 60.1811, GNorm = 1.2442, lr_0 = 3.0305e-04
Loss = 4.4284e-01, PNorm = 60.1876, GNorm = 1.3854, lr_0 = 3.0284e-04
Loss = 3.7247e-01, PNorm = 60.1936, GNorm = 1.0320, lr_0 = 3.0264e-04
Loss = 4.1219e-01, PNorm = 60.1963, GNorm = 1.2117, lr_0 = 3.0243e-04
Loss = 3.8461e-01, PNorm = 60.2001, GNorm = 1.3525, lr_0 = 3.0222e-04
Loss = 3.9674e-01, PNorm = 60.2015, GNorm = 1.5277, lr_0 = 3.0202e-04
Loss = 3.9163e-01, PNorm = 60.2033, GNorm = 1.3410, lr_0 = 3.0181e-04
Loss = 3.9269e-01, PNorm = 60.2110, GNorm = 2.3063, lr_0 = 3.0160e-04
Loss = 3.5418e-01, PNorm = 60.2158, GNorm = 1.7540, lr_0 = 3.0140e-04
Loss = 3.4727e-01, PNorm = 60.2182, GNorm = 1.3756, lr_0 = 3.0119e-04
Loss = 3.5648e-01, PNorm = 60.2251, GNorm = 1.6878, lr_0 = 3.0098e-04
Loss = 3.6929e-01, PNorm = 60.2290, GNorm = 1.5563, lr_0 = 3.0078e-04
Loss = 4.0382e-01, PNorm = 60.2353, GNorm = 2.4670, lr_0 = 3.0057e-04
Loss = 4.5759e-01, PNorm = 60.2412, GNorm = 1.7849, lr_0 = 3.0036e-04
Loss = 3.8664e-01, PNorm = 60.2508, GNorm = 2.6157, lr_0 = 3.0016e-04
Loss = 3.7188e-01, PNorm = 60.2587, GNorm = 1.0946, lr_0 = 2.9995e-04
Loss = 3.7015e-01, PNorm = 60.2600, GNorm = 1.3013, lr_0 = 2.9975e-04
Loss = 4.1598e-01, PNorm = 60.2625, GNorm = 1.3386, lr_0 = 2.9954e-04
Loss = 3.9911e-01, PNorm = 60.2650, GNorm = 1.5326, lr_0 = 2.9934e-04
Loss = 4.0391e-01, PNorm = 60.2671, GNorm = 1.0598, lr_0 = 2.9913e-04
Loss = 3.8983e-01, PNorm = 60.2672, GNorm = 1.8808, lr_0 = 2.9893e-04
Loss = 3.5765e-01, PNorm = 60.2709, GNorm = 1.1041, lr_0 = 2.9872e-04
Loss = 3.3344e-01, PNorm = 60.2744, GNorm = 1.0509, lr_0 = 2.9852e-04
Loss = 3.8299e-01, PNorm = 60.2769, GNorm = 1.5336, lr_0 = 2.9831e-04
Loss = 4.1804e-01, PNorm = 60.2752, GNorm = 1.4021, lr_0 = 2.9811e-04
Loss = 3.9003e-01, PNorm = 60.2786, GNorm = 1.5762, lr_0 = 2.9790e-04
Loss = 3.5616e-01, PNorm = 60.2830, GNorm = 1.0882, lr_0 = 2.9770e-04
Loss = 3.4436e-01, PNorm = 60.2894, GNorm = 1.7877, lr_0 = 2.9750e-04
Loss = 4.0780e-01, PNorm = 60.2934, GNorm = 1.3545, lr_0 = 2.9729e-04
Loss = 3.4048e-01, PNorm = 60.2979, GNorm = 1.3502, lr_0 = 2.9709e-04
Loss = 4.0419e-01, PNorm = 60.3002, GNorm = 1.4646, lr_0 = 2.9689e-04
Loss = 3.3784e-01, PNorm = 60.3029, GNorm = 1.6671, lr_0 = 2.9668e-04
Loss = 3.9640e-01, PNorm = 60.3036, GNorm = 1.3665, lr_0 = 2.9648e-04
Loss = 4.1755e-01, PNorm = 60.3086, GNorm = 1.3578, lr_0 = 2.9628e-04
Loss = 4.1156e-01, PNorm = 60.3115, GNorm = 1.7763, lr_0 = 2.9607e-04
Loss = 4.3319e-01, PNorm = 60.3134, GNorm = 1.3465, lr_0 = 2.9587e-04
Loss = 4.0302e-01, PNorm = 60.3157, GNorm = 1.5611, lr_0 = 2.9567e-04
Loss = 3.6984e-01, PNorm = 60.3179, GNorm = 1.7386, lr_0 = 2.9546e-04
Loss = 4.2133e-01, PNorm = 60.3162, GNorm = 1.1315, lr_0 = 2.9526e-04
Loss = 3.4140e-01, PNorm = 60.3190, GNorm = 1.0091, lr_0 = 2.9506e-04
Loss = 3.6635e-01, PNorm = 60.3247, GNorm = 1.0656, lr_0 = 2.9486e-04
Loss = 4.0839e-01, PNorm = 60.3255, GNorm = 1.2533, lr_0 = 2.9466e-04
Loss = 3.9062e-01, PNorm = 60.3307, GNorm = 1.7273, lr_0 = 2.9445e-04
Loss = 3.7283e-01, PNorm = 60.3299, GNorm = 1.5603, lr_0 = 2.9425e-04
Loss = 3.3656e-01, PNorm = 60.3348, GNorm = 0.8542, lr_0 = 2.9405e-04
Loss = 3.9968e-01, PNorm = 60.3374, GNorm = 1.4877, lr_0 = 2.9385e-04
Loss = 4.0445e-01, PNorm = 60.3395, GNorm = 1.4986, lr_0 = 2.9365e-04
Loss = 3.8433e-01, PNorm = 60.3462, GNorm = 0.8880, lr_0 = 2.9345e-04
Loss = 3.7643e-01, PNorm = 60.3487, GNorm = 1.5916, lr_0 = 2.9325e-04
Loss = 3.8022e-01, PNorm = 60.3509, GNorm = 1.2293, lr_0 = 2.9305e-04
Loss = 4.2606e-01, PNorm = 60.3560, GNorm = 1.6057, lr_0 = 2.9284e-04
Loss = 3.2708e-01, PNorm = 60.3571, GNorm = 0.8536, lr_0 = 2.9264e-04
Loss = 3.8186e-01, PNorm = 60.3613, GNorm = 1.9938, lr_0 = 2.9244e-04
Loss = 3.8755e-01, PNorm = 60.3611, GNorm = 0.9105, lr_0 = 2.9224e-04
Loss = 3.4063e-01, PNorm = 60.3605, GNorm = 1.2783, lr_0 = 2.9204e-04
Loss = 3.9065e-01, PNorm = 60.3640, GNorm = 1.2020, lr_0 = 2.9184e-04
Loss = 3.7660e-01, PNorm = 60.3659, GNorm = 2.3483, lr_0 = 2.9164e-04
Loss = 3.6383e-01, PNorm = 60.3675, GNorm = 1.4808, lr_0 = 2.9144e-04
Loss = 4.0429e-01, PNorm = 60.3721, GNorm = 1.2792, lr_0 = 2.9124e-04
Validation mae = 0.112095
Epoch 17
Loss = 3.6734e-01, PNorm = 60.3775, GNorm = 1.2208, lr_0 = 2.9104e-04
Loss = 3.7266e-01, PNorm = 60.3816, GNorm = 1.5587, lr_0 = 2.9084e-04
Loss = 3.4101e-01, PNorm = 60.3843, GNorm = 1.2662, lr_0 = 2.9065e-04
Loss = 3.3343e-01, PNorm = 60.3885, GNorm = 1.2940, lr_0 = 2.9045e-04
Loss = 3.6967e-01, PNorm = 60.3932, GNorm = 1.4263, lr_0 = 2.9025e-04
Loss = 3.8777e-01, PNorm = 60.3992, GNorm = 1.4213, lr_0 = 2.9005e-04
Loss = 3.7823e-01, PNorm = 60.4048, GNorm = 1.2998, lr_0 = 2.8985e-04
Loss = 3.5073e-01, PNorm = 60.4085, GNorm = 1.3020, lr_0 = 2.8965e-04
Loss = 3.1146e-01, PNorm = 60.4150, GNorm = 1.5789, lr_0 = 2.8945e-04
Loss = 3.6314e-01, PNorm = 60.4165, GNorm = 1.4546, lr_0 = 2.8925e-04
Loss = 3.8911e-01, PNorm = 60.4213, GNorm = 1.7828, lr_0 = 2.8906e-04
Loss = 4.0422e-01, PNorm = 60.4250, GNorm = 1.4780, lr_0 = 2.8886e-04
Loss = 4.0191e-01, PNorm = 60.4310, GNorm = 1.3482, lr_0 = 2.8866e-04
Loss = 4.0858e-01, PNorm = 60.4370, GNorm = 2.1263, lr_0 = 2.8846e-04
Loss = 3.9982e-01, PNorm = 60.4378, GNorm = 1.6444, lr_0 = 2.8826e-04
Loss = 3.7937e-01, PNorm = 60.4441, GNorm = 1.5607, lr_0 = 2.8807e-04
Loss = 3.3693e-01, PNorm = 60.4464, GNorm = 1.4030, lr_0 = 2.8787e-04
Loss = 3.9800e-01, PNorm = 60.4508, GNorm = 1.2643, lr_0 = 2.8767e-04
Loss = 3.1773e-01, PNorm = 60.4599, GNorm = 1.2269, lr_0 = 2.8748e-04
Loss = 4.1008e-01, PNorm = 60.4636, GNorm = 1.1061, lr_0 = 2.8728e-04
Loss = 3.7126e-01, PNorm = 60.4658, GNorm = 1.4175, lr_0 = 2.8708e-04
Loss = 3.8901e-01, PNorm = 60.4695, GNorm = 1.4438, lr_0 = 2.8689e-04
Loss = 4.1081e-01, PNorm = 60.4796, GNorm = 1.4798, lr_0 = 2.8669e-04
Loss = 3.9640e-01, PNorm = 60.4856, GNorm = 1.4986, lr_0 = 2.8649e-04
Loss = 4.0665e-01, PNorm = 60.4850, GNorm = 1.3041, lr_0 = 2.8630e-04
Loss = 3.7067e-01, PNorm = 60.4883, GNorm = 1.5129, lr_0 = 2.8610e-04
Loss = 4.3436e-01, PNorm = 60.4931, GNorm = 1.5131, lr_0 = 2.8590e-04
Loss = 3.5598e-01, PNorm = 60.4979, GNorm = 1.6231, lr_0 = 2.8571e-04
Loss = 4.3964e-01, PNorm = 60.5038, GNorm = 1.3757, lr_0 = 2.8551e-04
Loss = 4.2772e-01, PNorm = 60.5117, GNorm = 1.5597, lr_0 = 2.8532e-04
Loss = 3.8098e-01, PNorm = 60.5139, GNorm = 0.9341, lr_0 = 2.8512e-04
Loss = 3.8962e-01, PNorm = 60.5184, GNorm = 1.2251, lr_0 = 2.8493e-04
Loss = 3.7554e-01, PNorm = 60.5204, GNorm = 1.4662, lr_0 = 2.8473e-04
Loss = 3.7860e-01, PNorm = 60.5239, GNorm = 1.2234, lr_0 = 2.8454e-04
Loss = 3.5646e-01, PNorm = 60.5268, GNorm = 1.3803, lr_0 = 2.8434e-04
Loss = 3.5979e-01, PNorm = 60.5279, GNorm = 1.2737, lr_0 = 2.8415e-04
Loss = 3.3872e-01, PNorm = 60.5275, GNorm = 1.7236, lr_0 = 2.8395e-04
Loss = 3.9344e-01, PNorm = 60.5290, GNorm = 1.5714, lr_0 = 2.8376e-04
Loss = 3.4394e-01, PNorm = 60.5304, GNorm = 1.4859, lr_0 = 2.8356e-04
Loss = 3.8266e-01, PNorm = 60.5335, GNorm = 1.2671, lr_0 = 2.8337e-04
Loss = 3.7260e-01, PNorm = 60.5386, GNorm = 1.4271, lr_0 = 2.8317e-04
Loss = 3.8312e-01, PNorm = 60.5434, GNorm = 1.3197, lr_0 = 2.8298e-04
Loss = 3.8933e-01, PNorm = 60.5511, GNorm = 1.3687, lr_0 = 2.8279e-04
Loss = 3.5290e-01, PNorm = 60.5547, GNorm = 1.2726, lr_0 = 2.8259e-04
Loss = 3.7832e-01, PNorm = 60.5564, GNorm = 1.5829, lr_0 = 2.8240e-04
Loss = 3.5224e-01, PNorm = 60.5577, GNorm = 1.7389, lr_0 = 2.8221e-04
Loss = 3.5663e-01, PNorm = 60.5617, GNorm = 1.0919, lr_0 = 2.8201e-04
Loss = 3.6118e-01, PNorm = 60.5700, GNorm = 1.4203, lr_0 = 2.8182e-04
Loss = 3.6102e-01, PNorm = 60.5738, GNorm = 1.2112, lr_0 = 2.8163e-04
Loss = 4.2364e-01, PNorm = 60.5785, GNorm = 1.3349, lr_0 = 2.8143e-04
Loss = 3.8408e-01, PNorm = 60.5810, GNorm = 1.5174, lr_0 = 2.8124e-04
Loss = 3.5727e-01, PNorm = 60.5818, GNorm = 1.5172, lr_0 = 2.8105e-04
Loss = 4.0372e-01, PNorm = 60.5888, GNorm = 1.3691, lr_0 = 2.8085e-04
Loss = 3.9251e-01, PNorm = 60.5930, GNorm = 1.4692, lr_0 = 2.8066e-04
Loss = 4.1612e-01, PNorm = 60.5922, GNorm = 1.6511, lr_0 = 2.8047e-04
Loss = 4.1639e-01, PNorm = 60.5908, GNorm = 1.4274, lr_0 = 2.8028e-04
Loss = 3.7079e-01, PNorm = 60.5912, GNorm = 1.1331, lr_0 = 2.8009e-04
Loss = 4.1055e-01, PNorm = 60.5925, GNorm = 1.2031, lr_0 = 2.7989e-04
Loss = 3.6656e-01, PNorm = 60.5919, GNorm = 1.3701, lr_0 = 2.7970e-04
Loss = 3.4359e-01, PNorm = 60.5940, GNorm = 1.3404, lr_0 = 2.7951e-04
Loss = 3.5074e-01, PNorm = 60.5986, GNorm = 0.9146, lr_0 = 2.7932e-04
Loss = 4.0366e-01, PNorm = 60.6005, GNorm = 1.5391, lr_0 = 2.7913e-04
Loss = 3.9954e-01, PNorm = 60.6049, GNorm = 1.7645, lr_0 = 2.7894e-04
Loss = 3.6595e-01, PNorm = 60.6070, GNorm = 1.4880, lr_0 = 2.7875e-04
Loss = 3.6456e-01, PNorm = 60.6099, GNorm = 1.2908, lr_0 = 2.7855e-04
Loss = 3.8781e-01, PNorm = 60.6174, GNorm = 1.6261, lr_0 = 2.7836e-04
Loss = 3.4755e-01, PNorm = 60.6217, GNorm = 1.4760, lr_0 = 2.7817e-04
Loss = 3.4236e-01, PNorm = 60.6231, GNorm = 1.4925, lr_0 = 2.7798e-04
Loss = 3.4761e-01, PNorm = 60.6237, GNorm = 1.9025, lr_0 = 2.7779e-04
Loss = 3.4470e-01, PNorm = 60.6242, GNorm = 1.0182, lr_0 = 2.7760e-04
Loss = 3.8238e-01, PNorm = 60.6275, GNorm = 1.5142, lr_0 = 2.7741e-04
Loss = 4.3099e-01, PNorm = 60.6328, GNorm = 1.4804, lr_0 = 2.7722e-04
Loss = 4.1641e-01, PNorm = 60.6354, GNorm = 1.6619, lr_0 = 2.7703e-04
Loss = 3.5098e-01, PNorm = 60.6373, GNorm = 1.2270, lr_0 = 2.7684e-04
Loss = 3.2992e-01, PNorm = 60.6387, GNorm = 1.4943, lr_0 = 2.7665e-04
Loss = 3.5117e-01, PNorm = 60.6406, GNorm = 1.3889, lr_0 = 2.7646e-04
Loss = 3.3499e-01, PNorm = 60.6450, GNorm = 1.6919, lr_0 = 2.7627e-04
Loss = 3.6578e-01, PNorm = 60.6478, GNorm = 1.1739, lr_0 = 2.7608e-04
Loss = 3.5324e-01, PNorm = 60.6492, GNorm = 1.3270, lr_0 = 2.7590e-04
Loss = 4.2648e-01, PNorm = 60.6504, GNorm = 1.7519, lr_0 = 2.7571e-04
Loss = 3.4816e-01, PNorm = 60.6509, GNorm = 1.4723, lr_0 = 2.7552e-04
Loss = 3.6700e-01, PNorm = 60.6532, GNorm = 1.7558, lr_0 = 2.7533e-04
Loss = 4.2974e-01, PNorm = 60.6600, GNorm = 1.6415, lr_0 = 2.7514e-04
Loss = 3.4923e-01, PNorm = 60.6678, GNorm = 1.6042, lr_0 = 2.7495e-04
Loss = 3.8352e-01, PNorm = 60.6688, GNorm = 1.4270, lr_0 = 2.7476e-04
Loss = 3.7787e-01, PNorm = 60.6728, GNorm = 1.4966, lr_0 = 2.7457e-04
Loss = 3.8753e-01, PNorm = 60.6769, GNorm = 1.8319, lr_0 = 2.7439e-04
Loss = 4.2974e-01, PNorm = 60.6799, GNorm = 1.3101, lr_0 = 2.7420e-04
Loss = 3.4338e-01, PNorm = 60.6806, GNorm = 1.4556, lr_0 = 2.7401e-04
Loss = 3.6093e-01, PNorm = 60.6805, GNorm = 1.4653, lr_0 = 2.7382e-04
Loss = 3.5995e-01, PNorm = 60.6835, GNorm = 1.0399, lr_0 = 2.7364e-04
Loss = 4.3822e-01, PNorm = 60.6862, GNorm = 1.5761, lr_0 = 2.7345e-04
Loss = 3.6199e-01, PNorm = 60.6912, GNorm = 1.3462, lr_0 = 2.7326e-04
Loss = 3.3308e-01, PNorm = 60.6957, GNorm = 1.4817, lr_0 = 2.7307e-04
Loss = 4.2218e-01, PNorm = 60.6990, GNorm = 1.1054, lr_0 = 2.7289e-04
Loss = 3.4008e-01, PNorm = 60.7057, GNorm = 1.1609, lr_0 = 2.7270e-04
Loss = 3.8538e-01, PNorm = 60.7089, GNorm = 1.4197, lr_0 = 2.7251e-04
Loss = 3.8679e-01, PNorm = 60.7106, GNorm = 1.3919, lr_0 = 2.7233e-04
Loss = 3.5283e-01, PNorm = 60.7124, GNorm = 1.4632, lr_0 = 2.7214e-04
Loss = 3.3152e-01, PNorm = 60.7175, GNorm = 1.2928, lr_0 = 2.7195e-04
Loss = 3.0991e-01, PNorm = 60.7226, GNorm = 1.1608, lr_0 = 2.7177e-04
Loss = 3.6373e-01, PNorm = 60.7250, GNorm = 1.4447, lr_0 = 2.7158e-04
Loss = 3.4814e-01, PNorm = 60.7279, GNorm = 1.2556, lr_0 = 2.7139e-04
Loss = 3.5145e-01, PNorm = 60.7309, GNorm = 0.9966, lr_0 = 2.7121e-04
Loss = 3.6227e-01, PNorm = 60.7322, GNorm = 1.1358, lr_0 = 2.7102e-04
Loss = 4.0602e-01, PNorm = 60.7337, GNorm = 1.7363, lr_0 = 2.7084e-04
Loss = 4.5026e-01, PNorm = 60.7382, GNorm = 1.5287, lr_0 = 2.7065e-04
Loss = 3.6143e-01, PNorm = 60.7413, GNorm = 1.7849, lr_0 = 2.7047e-04
Loss = 3.1111e-01, PNorm = 60.7418, GNorm = 1.1186, lr_0 = 2.7028e-04
Loss = 3.8425e-01, PNorm = 60.7440, GNorm = 1.3916, lr_0 = 2.7010e-04
Loss = 3.3851e-01, PNorm = 60.7465, GNorm = 1.1307, lr_0 = 2.6991e-04
Loss = 3.9649e-01, PNorm = 60.7516, GNorm = 1.1733, lr_0 = 2.6973e-04
Loss = 3.8478e-01, PNorm = 60.7524, GNorm = 1.4434, lr_0 = 2.6954e-04
Loss = 4.0069e-01, PNorm = 60.7545, GNorm = 1.2816, lr_0 = 2.6936e-04
Loss = 3.4776e-01, PNorm = 60.7595, GNorm = 1.7711, lr_0 = 2.6917e-04
Loss = 4.4465e-01, PNorm = 60.7600, GNorm = 1.7541, lr_0 = 2.6899e-04
Loss = 3.5466e-01, PNorm = 60.7622, GNorm = 1.3653, lr_0 = 2.6880e-04
Loss = 4.2048e-01, PNorm = 60.7630, GNorm = 1.8623, lr_0 = 2.6862e-04
Loss = 3.9394e-01, PNorm = 60.7683, GNorm = 1.6585, lr_0 = 2.6844e-04
Loss = 3.4335e-01, PNorm = 60.7710, GNorm = 1.7102, lr_0 = 2.6825e-04
Validation mae = 0.112461
Epoch 18
Loss = 3.8183e-01, PNorm = 60.7719, GNorm = 1.3203, lr_0 = 2.6807e-04
Loss = 3.3083e-01, PNorm = 60.7750, GNorm = 1.4148, lr_0 = 2.6788e-04
Loss = 3.3070e-01, PNorm = 60.7780, GNorm = 1.2250, lr_0 = 2.6770e-04
Loss = 4.1218e-01, PNorm = 60.7818, GNorm = 1.4581, lr_0 = 2.6752e-04
Loss = 3.4725e-01, PNorm = 60.7892, GNorm = 2.2492, lr_0 = 2.6733e-04
Loss = 3.9193e-01, PNorm = 60.7898, GNorm = 1.7331, lr_0 = 2.6715e-04
Loss = 4.1161e-01, PNorm = 60.7943, GNorm = 1.4680, lr_0 = 2.6697e-04
Loss = 3.5650e-01, PNorm = 60.7950, GNorm = 1.1554, lr_0 = 2.6678e-04
Loss = 3.6402e-01, PNorm = 60.7963, GNorm = 1.6170, lr_0 = 2.6660e-04
Loss = 3.5884e-01, PNorm = 60.8036, GNorm = 1.5496, lr_0 = 2.6642e-04
Loss = 3.0666e-01, PNorm = 60.8082, GNorm = 1.4338, lr_0 = 2.6624e-04
Loss = 3.8620e-01, PNorm = 60.8076, GNorm = 1.7232, lr_0 = 2.6605e-04
Loss = 4.1578e-01, PNorm = 60.8133, GNorm = 1.6759, lr_0 = 2.6587e-04
Loss = 4.0339e-01, PNorm = 60.8159, GNorm = 1.7821, lr_0 = 2.6569e-04
Loss = 3.9514e-01, PNorm = 60.8230, GNorm = 1.9200, lr_0 = 2.6551e-04
Loss = 3.8638e-01, PNorm = 60.8211, GNorm = 1.1104, lr_0 = 2.6533e-04
Loss = 4.3532e-01, PNorm = 60.8270, GNorm = 1.5566, lr_0 = 2.6514e-04
Loss = 3.9357e-01, PNorm = 60.8281, GNorm = 1.5040, lr_0 = 2.6496e-04
Loss = 3.8888e-01, PNorm = 60.8293, GNorm = 1.5496, lr_0 = 2.6478e-04
Loss = 3.7287e-01, PNorm = 60.8338, GNorm = 1.6811, lr_0 = 2.6460e-04
Loss = 3.8600e-01, PNorm = 60.8388, GNorm = 1.7231, lr_0 = 2.6442e-04
Loss = 4.2190e-01, PNorm = 60.8410, GNorm = 1.8061, lr_0 = 2.6424e-04
Loss = 3.5086e-01, PNorm = 60.8453, GNorm = 1.4784, lr_0 = 2.6406e-04
Loss = 3.6729e-01, PNorm = 60.8506, GNorm = 1.8979, lr_0 = 2.6388e-04
Loss = 3.6033e-01, PNorm = 60.8540, GNorm = 1.4216, lr_0 = 2.6369e-04
Loss = 3.7556e-01, PNorm = 60.8575, GNorm = 2.2303, lr_0 = 2.6351e-04
Loss = 3.9416e-01, PNorm = 60.8629, GNorm = 1.7845, lr_0 = 2.6333e-04
Loss = 3.6660e-01, PNorm = 60.8656, GNorm = 1.4510, lr_0 = 2.6315e-04
Loss = 3.5702e-01, PNorm = 60.8721, GNorm = 0.9097, lr_0 = 2.6297e-04
Loss = 4.0233e-01, PNorm = 60.8746, GNorm = 1.4895, lr_0 = 2.6279e-04
Loss = 3.8482e-01, PNorm = 60.8742, GNorm = 1.7227, lr_0 = 2.6261e-04
Loss = 3.2271e-01, PNorm = 60.8772, GNorm = 1.4708, lr_0 = 2.6243e-04
Loss = 3.5536e-01, PNorm = 60.8807, GNorm = 1.5762, lr_0 = 2.6225e-04
Loss = 3.6924e-01, PNorm = 60.8809, GNorm = 1.2392, lr_0 = 2.6207e-04
Loss = 3.6498e-01, PNorm = 60.8836, GNorm = 1.4750, lr_0 = 2.6189e-04
Loss = 3.8926e-01, PNorm = 60.8860, GNorm = 1.6146, lr_0 = 2.6171e-04
Loss = 4.1262e-01, PNorm = 60.8889, GNorm = 1.9957, lr_0 = 2.6153e-04
Loss = 3.6002e-01, PNorm = 60.8940, GNorm = 1.1421, lr_0 = 2.6136e-04
Loss = 3.4442e-01, PNorm = 60.8976, GNorm = 1.0340, lr_0 = 2.6118e-04
Loss = 3.4147e-01, PNorm = 60.8975, GNorm = 1.6544, lr_0 = 2.6100e-04
Loss = 3.5673e-01, PNorm = 60.8977, GNorm = 2.1455, lr_0 = 2.6082e-04
Loss = 3.4672e-01, PNorm = 60.9027, GNorm = 1.1013, lr_0 = 2.6064e-04
Loss = 4.1041e-01, PNorm = 60.9045, GNorm = 1.7246, lr_0 = 2.6046e-04
Loss = 3.6907e-01, PNorm = 60.9094, GNorm = 1.4968, lr_0 = 2.6028e-04
Loss = 3.3172e-01, PNorm = 60.9126, GNorm = 1.0614, lr_0 = 2.6011e-04
Loss = 4.1241e-01, PNorm = 60.9153, GNorm = 1.3400, lr_0 = 2.5993e-04
Loss = 2.9625e-01, PNorm = 60.9206, GNorm = 0.9383, lr_0 = 2.5975e-04
Loss = 3.7789e-01, PNorm = 60.9243, GNorm = 1.8809, lr_0 = 2.5957e-04
Loss = 3.3918e-01, PNorm = 60.9278, GNorm = 1.2068, lr_0 = 2.5939e-04
Loss = 3.5893e-01, PNorm = 60.9299, GNorm = 1.3974, lr_0 = 2.5922e-04
Loss = 3.9377e-01, PNorm = 60.9385, GNorm = 1.6594, lr_0 = 2.5904e-04
Loss = 3.5104e-01, PNorm = 60.9450, GNorm = 1.1551, lr_0 = 2.5886e-04
Loss = 4.9867e-01, PNorm = 60.9485, GNorm = 1.3067, lr_0 = 2.5868e-04
Loss = 3.5770e-01, PNorm = 60.9516, GNorm = 1.0721, lr_0 = 2.5851e-04
Loss = 4.2816e-01, PNorm = 60.9558, GNorm = 1.4572, lr_0 = 2.5833e-04
Loss = 4.0229e-01, PNorm = 60.9573, GNorm = 1.2255, lr_0 = 2.5815e-04
Loss = 3.9833e-01, PNorm = 60.9571, GNorm = 1.3566, lr_0 = 2.5797e-04
Loss = 3.4049e-01, PNorm = 60.9537, GNorm = 0.9696, lr_0 = 2.5780e-04
Loss = 4.2716e-01, PNorm = 60.9540, GNorm = 1.7920, lr_0 = 2.5762e-04
Loss = 3.9219e-01, PNorm = 60.9560, GNorm = 1.1448, lr_0 = 2.5745e-04
Loss = 3.6708e-01, PNorm = 60.9620, GNorm = 1.6808, lr_0 = 2.5727e-04
Loss = 3.6540e-01, PNorm = 60.9642, GNorm = 1.6017, lr_0 = 2.5709e-04
Loss = 4.1440e-01, PNorm = 60.9653, GNorm = 1.6726, lr_0 = 2.5692e-04
Loss = 3.6326e-01, PNorm = 60.9643, GNorm = 1.2742, lr_0 = 2.5674e-04
Loss = 3.2569e-01, PNorm = 60.9690, GNorm = 1.5098, lr_0 = 2.5656e-04
Loss = 3.2709e-01, PNorm = 60.9736, GNorm = 1.0422, lr_0 = 2.5639e-04
Loss = 3.9141e-01, PNorm = 60.9731, GNorm = 1.4838, lr_0 = 2.5621e-04
Loss = 3.5290e-01, PNorm = 60.9752, GNorm = 1.1088, lr_0 = 2.5604e-04
Loss = 3.6115e-01, PNorm = 60.9774, GNorm = 1.9497, lr_0 = 2.5586e-04
Loss = 3.5755e-01, PNorm = 60.9846, GNorm = 1.3649, lr_0 = 2.5569e-04
Loss = 3.5813e-01, PNorm = 60.9868, GNorm = 1.5987, lr_0 = 2.5551e-04
Loss = 3.5446e-01, PNorm = 60.9946, GNorm = 1.6103, lr_0 = 2.5534e-04
Loss = 3.9455e-01, PNorm = 60.9945, GNorm = 1.0900, lr_0 = 2.5516e-04
Loss = 2.9229e-01, PNorm = 60.9946, GNorm = 1.2217, lr_0 = 2.5499e-04
Loss = 3.9425e-01, PNorm = 60.9992, GNorm = 1.6989, lr_0 = 2.5481e-04
Loss = 3.3649e-01, PNorm = 61.0029, GNorm = 1.7772, lr_0 = 2.5464e-04
Loss = 3.8980e-01, PNorm = 61.0047, GNorm = 1.5700, lr_0 = 2.5446e-04
Loss = 3.4708e-01, PNorm = 61.0056, GNorm = 1.3368, lr_0 = 2.5429e-04
Loss = 3.8530e-01, PNorm = 61.0094, GNorm = 1.2710, lr_0 = 2.5411e-04
Loss = 4.0414e-01, PNorm = 61.0119, GNorm = 1.2752, lr_0 = 2.5394e-04
Loss = 3.5757e-01, PNorm = 61.0137, GNorm = 1.3671, lr_0 = 2.5377e-04
Loss = 3.3630e-01, PNorm = 61.0169, GNorm = 2.0371, lr_0 = 2.5359e-04
Loss = 3.4794e-01, PNorm = 61.0194, GNorm = 1.3818, lr_0 = 2.5342e-04
Loss = 3.6311e-01, PNorm = 61.0215, GNorm = 1.5366, lr_0 = 2.5325e-04
Loss = 3.5094e-01, PNorm = 61.0233, GNorm = 1.7732, lr_0 = 2.5307e-04
Loss = 3.4672e-01, PNorm = 61.0229, GNorm = 1.3005, lr_0 = 2.5290e-04
Loss = 3.4512e-01, PNorm = 61.0233, GNorm = 0.7985, lr_0 = 2.5273e-04
Loss = 3.3570e-01, PNorm = 61.0291, GNorm = 1.5452, lr_0 = 2.5255e-04
Loss = 3.5893e-01, PNorm = 61.0345, GNorm = 1.2081, lr_0 = 2.5238e-04
Loss = 3.3642e-01, PNorm = 61.0389, GNorm = 1.1054, lr_0 = 2.5221e-04
Loss = 3.6396e-01, PNorm = 61.0406, GNorm = 2.1972, lr_0 = 2.5203e-04
Loss = 3.7096e-01, PNorm = 61.0487, GNorm = 1.0410, lr_0 = 2.5186e-04
Loss = 4.4114e-01, PNorm = 61.0512, GNorm = 1.2493, lr_0 = 2.5169e-04
Loss = 4.2432e-01, PNorm = 61.0546, GNorm = 2.5152, lr_0 = 2.5152e-04
Loss = 3.6168e-01, PNorm = 61.0579, GNorm = 1.5859, lr_0 = 2.5134e-04
Loss = 3.7344e-01, PNorm = 61.0541, GNorm = 1.2804, lr_0 = 2.5117e-04
Loss = 3.6842e-01, PNorm = 61.0583, GNorm = 1.3690, lr_0 = 2.5100e-04
Loss = 3.7466e-01, PNorm = 61.0630, GNorm = 1.1763, lr_0 = 2.5083e-04
Loss = 4.2982e-01, PNorm = 61.0663, GNorm = 1.6762, lr_0 = 2.5066e-04
Loss = 3.2746e-01, PNorm = 61.0675, GNorm = 1.2410, lr_0 = 2.5048e-04
Loss = 3.9013e-01, PNorm = 61.0711, GNorm = 1.0041, lr_0 = 2.5031e-04
Loss = 3.3997e-01, PNorm = 61.0765, GNorm = 1.1555, lr_0 = 2.5014e-04
Loss = 3.6585e-01, PNorm = 61.0790, GNorm = 1.4997, lr_0 = 2.4997e-04
Loss = 3.7513e-01, PNorm = 61.0828, GNorm = 1.1689, lr_0 = 2.4980e-04
Loss = 3.6747e-01, PNorm = 61.0845, GNorm = 1.6373, lr_0 = 2.4963e-04
Loss = 3.8255e-01, PNorm = 61.0897, GNorm = 1.7440, lr_0 = 2.4946e-04
Loss = 3.8293e-01, PNorm = 61.0922, GNorm = 2.2076, lr_0 = 2.4929e-04
Loss = 3.4701e-01, PNorm = 61.0936, GNorm = 1.3265, lr_0 = 2.4911e-04
Loss = 4.3048e-01, PNorm = 61.0976, GNorm = 1.7781, lr_0 = 2.4894e-04
Loss = 4.0454e-01, PNorm = 61.1016, GNorm = 1.2070, lr_0 = 2.4877e-04
Loss = 4.1464e-01, PNorm = 61.1018, GNorm = 1.5480, lr_0 = 2.4860e-04
Loss = 4.0772e-01, PNorm = 61.1068, GNorm = 1.8600, lr_0 = 2.4843e-04
Loss = 4.2403e-01, PNorm = 61.1095, GNorm = 2.0777, lr_0 = 2.4826e-04
Loss = 4.0968e-01, PNorm = 61.1144, GNorm = 1.6403, lr_0 = 2.4809e-04
Loss = 3.4597e-01, PNorm = 61.1173, GNorm = 1.0604, lr_0 = 2.4792e-04
Loss = 3.5462e-01, PNorm = 61.1193, GNorm = 1.2364, lr_0 = 2.4775e-04
Loss = 3.2882e-01, PNorm = 61.1199, GNorm = 1.4646, lr_0 = 2.4758e-04
Loss = 3.6956e-01, PNorm = 61.1206, GNorm = 1.3441, lr_0 = 2.4741e-04
Loss = 4.0405e-01, PNorm = 61.1274, GNorm = 2.1439, lr_0 = 2.4724e-04
Loss = 3.4494e-01, PNorm = 61.1325, GNorm = 1.1709, lr_0 = 2.4707e-04
Validation mae = 0.112125
Epoch 19
Loss = 3.8234e-01, PNorm = 61.1335, GNorm = 1.5201, lr_0 = 2.4690e-04
Loss = 3.6509e-01, PNorm = 61.1394, GNorm = 2.2101, lr_0 = 2.4674e-04
Loss = 3.8553e-01, PNorm = 61.1409, GNorm = 1.2328, lr_0 = 2.4657e-04
Loss = 2.9866e-01, PNorm = 61.1424, GNorm = 1.0055, lr_0 = 2.4640e-04
Loss = 3.8345e-01, PNorm = 61.1445, GNorm = 2.0964, lr_0 = 2.4623e-04
Loss = 3.8982e-01, PNorm = 61.1449, GNorm = 1.5531, lr_0 = 2.4606e-04
Loss = 3.6170e-01, PNorm = 61.1501, GNorm = 1.0659, lr_0 = 2.4589e-04
Loss = 3.7238e-01, PNorm = 61.1559, GNorm = 1.7386, lr_0 = 2.4572e-04
Loss = 3.6672e-01, PNorm = 61.1589, GNorm = 1.9429, lr_0 = 2.4556e-04
Loss = 3.3137e-01, PNorm = 61.1613, GNorm = 2.6763, lr_0 = 2.4539e-04
Loss = 3.3723e-01, PNorm = 61.1637, GNorm = 1.2377, lr_0 = 2.4522e-04
Loss = 3.7769e-01, PNorm = 61.1710, GNorm = 1.3949, lr_0 = 2.4505e-04
Loss = 3.7737e-01, PNorm = 61.1756, GNorm = 1.6658, lr_0 = 2.4488e-04
Loss = 3.5737e-01, PNorm = 61.1760, GNorm = 1.0994, lr_0 = 2.4472e-04
Loss = 3.7320e-01, PNorm = 61.1791, GNorm = 1.2598, lr_0 = 2.4455e-04
Loss = 3.5204e-01, PNorm = 61.1842, GNorm = 1.2697, lr_0 = 2.4438e-04
Loss = 4.0286e-01, PNorm = 61.1892, GNorm = 1.6313, lr_0 = 2.4421e-04
Loss = 3.8434e-01, PNorm = 61.1897, GNorm = 1.3137, lr_0 = 2.4405e-04
Loss = 4.0209e-01, PNorm = 61.1897, GNorm = 1.8546, lr_0 = 2.4388e-04
Loss = 3.6342e-01, PNorm = 61.1926, GNorm = 1.2376, lr_0 = 2.4371e-04
Loss = 3.9078e-01, PNorm = 61.1962, GNorm = 1.6905, lr_0 = 2.4354e-04
Loss = 3.9045e-01, PNorm = 61.2008, GNorm = 1.2547, lr_0 = 2.4338e-04
Loss = 3.6961e-01, PNorm = 61.2050, GNorm = 1.5308, lr_0 = 2.4321e-04
Loss = 3.9193e-01, PNorm = 61.2054, GNorm = 1.2321, lr_0 = 2.4304e-04
Loss = 3.8077e-01, PNorm = 61.2099, GNorm = 1.5960, lr_0 = 2.4288e-04
Loss = 3.6418e-01, PNorm = 61.2150, GNorm = 1.9415, lr_0 = 2.4271e-04
Loss = 3.3234e-01, PNorm = 61.2183, GNorm = 1.9061, lr_0 = 2.4254e-04
Loss = 3.5442e-01, PNorm = 61.2221, GNorm = 1.7219, lr_0 = 2.4238e-04
Loss = 3.7937e-01, PNorm = 61.2242, GNorm = 2.0694, lr_0 = 2.4221e-04
Loss = 3.5655e-01, PNorm = 61.2263, GNorm = 1.2870, lr_0 = 2.4205e-04
Loss = 3.3269e-01, PNorm = 61.2258, GNorm = 1.3839, lr_0 = 2.4188e-04
Loss = 3.9175e-01, PNorm = 61.2279, GNorm = 2.1358, lr_0 = 2.4171e-04
Loss = 3.4814e-01, PNorm = 61.2359, GNorm = 1.9219, lr_0 = 2.4155e-04
Loss = 3.5358e-01, PNorm = 61.2393, GNorm = 1.5929, lr_0 = 2.4138e-04
Loss = 3.6786e-01, PNorm = 61.2427, GNorm = 1.3662, lr_0 = 2.4122e-04
Loss = 3.4542e-01, PNorm = 61.2422, GNorm = 1.7815, lr_0 = 2.4105e-04
Loss = 4.2933e-01, PNorm = 61.2430, GNorm = 1.8730, lr_0 = 2.4089e-04
Loss = 3.1691e-01, PNorm = 61.2488, GNorm = 1.0877, lr_0 = 2.4072e-04
Loss = 3.9779e-01, PNorm = 61.2478, GNorm = 1.6445, lr_0 = 2.4056e-04
Loss = 3.5972e-01, PNorm = 61.2484, GNorm = 2.0549, lr_0 = 2.4039e-04
Loss = 3.8221e-01, PNorm = 61.2579, GNorm = 1.9096, lr_0 = 2.4023e-04
Loss = 3.4075e-01, PNorm = 61.2627, GNorm = 1.7064, lr_0 = 2.4006e-04
Loss = 4.7875e-01, PNorm = 61.2635, GNorm = 1.3548, lr_0 = 2.3990e-04
Loss = 3.7652e-01, PNorm = 61.2664, GNorm = 1.6238, lr_0 = 2.3974e-04
Loss = 3.9270e-01, PNorm = 61.2696, GNorm = 1.2788, lr_0 = 2.3957e-04
Loss = 3.5547e-01, PNorm = 61.2745, GNorm = 1.4313, lr_0 = 2.3941e-04
Loss = 3.9090e-01, PNorm = 61.2755, GNorm = 1.4212, lr_0 = 2.3924e-04
Loss = 3.8316e-01, PNorm = 61.2791, GNorm = 1.3921, lr_0 = 2.3908e-04
Loss = 3.6349e-01, PNorm = 61.2884, GNorm = 1.4735, lr_0 = 2.3892e-04
Loss = 3.6360e-01, PNorm = 61.2897, GNorm = 1.3622, lr_0 = 2.3875e-04
Loss = 3.1607e-01, PNorm = 61.2916, GNorm = 1.6986, lr_0 = 2.3859e-04
Loss = 3.7787e-01, PNorm = 61.2921, GNorm = 1.5050, lr_0 = 2.3842e-04
Loss = 3.8593e-01, PNorm = 61.2971, GNorm = 1.6670, lr_0 = 2.3826e-04
Loss = 3.0997e-01, PNorm = 61.2981, GNorm = 1.5197, lr_0 = 2.3810e-04
Loss = 3.7552e-01, PNorm = 61.2990, GNorm = 1.4660, lr_0 = 2.3794e-04
Loss = 3.5506e-01, PNorm = 61.3010, GNorm = 1.6191, lr_0 = 2.3777e-04
Loss = 4.1114e-01, PNorm = 61.3009, GNorm = 1.4322, lr_0 = 2.3761e-04
Loss = 3.9324e-01, PNorm = 61.3033, GNorm = 1.1621, lr_0 = 2.3745e-04
Loss = 3.6884e-01, PNorm = 61.3066, GNorm = 1.0647, lr_0 = 2.3728e-04
Loss = 4.0278e-01, PNorm = 61.3125, GNorm = 2.3315, lr_0 = 2.3712e-04
Loss = 3.5463e-01, PNorm = 61.3175, GNorm = 1.3470, lr_0 = 2.3696e-04
Loss = 3.8910e-01, PNorm = 61.3205, GNorm = 1.7706, lr_0 = 2.3680e-04
Loss = 3.6661e-01, PNorm = 61.3234, GNorm = 1.4291, lr_0 = 2.3663e-04
Loss = 3.6056e-01, PNorm = 61.3254, GNorm = 1.2801, lr_0 = 2.3647e-04
Loss = 3.3189e-01, PNorm = 61.3261, GNorm = 1.4847, lr_0 = 2.3631e-04
Loss = 3.7307e-01, PNorm = 61.3279, GNorm = 1.2027, lr_0 = 2.3615e-04
Loss = 3.6107e-01, PNorm = 61.3305, GNorm = 1.4284, lr_0 = 2.3599e-04
Loss = 3.5121e-01, PNorm = 61.3314, GNorm = 1.3202, lr_0 = 2.3582e-04
Loss = 3.4800e-01, PNorm = 61.3322, GNorm = 1.1209, lr_0 = 2.3566e-04
Loss = 3.6940e-01, PNorm = 61.3370, GNorm = 1.4553, lr_0 = 2.3550e-04
Loss = 3.3730e-01, PNorm = 61.3381, GNorm = 1.8070, lr_0 = 2.3534e-04
Loss = 3.8650e-01, PNorm = 61.3390, GNorm = 0.9589, lr_0 = 2.3518e-04
Loss = 3.7020e-01, PNorm = 61.3403, GNorm = 1.4824, lr_0 = 2.3502e-04
Loss = 3.3459e-01, PNorm = 61.3415, GNorm = 1.9216, lr_0 = 2.3486e-04
Loss = 3.7885e-01, PNorm = 61.3454, GNorm = 1.3460, lr_0 = 2.3470e-04
Loss = 3.7263e-01, PNorm = 61.3485, GNorm = 1.9693, lr_0 = 2.3454e-04
Loss = 3.6914e-01, PNorm = 61.3474, GNorm = 1.3819, lr_0 = 2.3437e-04
Loss = 4.5608e-01, PNorm = 61.3479, GNorm = 1.6215, lr_0 = 2.3421e-04
Loss = 4.0381e-01, PNorm = 61.3504, GNorm = 1.2115, lr_0 = 2.3405e-04
Loss = 3.9857e-01, PNorm = 61.3538, GNorm = 1.8102, lr_0 = 2.3389e-04
Loss = 4.0415e-01, PNorm = 61.3540, GNorm = 1.5473, lr_0 = 2.3373e-04
Loss = 3.6439e-01, PNorm = 61.3576, GNorm = 1.4326, lr_0 = 2.3357e-04
Loss = 3.5960e-01, PNorm = 61.3615, GNorm = 1.3393, lr_0 = 2.3341e-04
Loss = 3.8092e-01, PNorm = 61.3636, GNorm = 1.8007, lr_0 = 2.3325e-04
Loss = 3.7758e-01, PNorm = 61.3677, GNorm = 0.9661, lr_0 = 2.3309e-04
Loss = 3.6512e-01, PNorm = 61.3681, GNorm = 1.4701, lr_0 = 2.3293e-04
Loss = 3.2629e-01, PNorm = 61.3718, GNorm = 1.6223, lr_0 = 2.3277e-04
Loss = 3.2650e-01, PNorm = 61.3758, GNorm = 1.6174, lr_0 = 2.3261e-04
Loss = 3.5489e-01, PNorm = 61.3769, GNorm = 1.3841, lr_0 = 2.3246e-04
Loss = 3.1992e-01, PNorm = 61.3789, GNorm = 1.1041, lr_0 = 2.3230e-04
Loss = 3.3446e-01, PNorm = 61.3822, GNorm = 1.2516, lr_0 = 2.3214e-04
Loss = 3.6273e-01, PNorm = 61.3852, GNorm = 1.9331, lr_0 = 2.3198e-04
Loss = 3.5286e-01, PNorm = 61.3909, GNorm = 1.8999, lr_0 = 2.3182e-04
Loss = 3.0997e-01, PNorm = 61.3958, GNorm = 1.3668, lr_0 = 2.3166e-04
Loss = 4.1290e-01, PNorm = 61.3984, GNorm = 1.8047, lr_0 = 2.3150e-04
Loss = 4.0737e-01, PNorm = 61.4001, GNorm = 1.9045, lr_0 = 2.3134e-04
Loss = 3.5686e-01, PNorm = 61.4034, GNorm = 1.3174, lr_0 = 2.3118e-04
Loss = 3.9549e-01, PNorm = 61.4054, GNorm = 2.1025, lr_0 = 2.3103e-04
Loss = 2.8419e-01, PNorm = 61.4118, GNorm = 1.4660, lr_0 = 2.3087e-04
Loss = 3.5995e-01, PNorm = 61.4139, GNorm = 1.4156, lr_0 = 2.3071e-04
Loss = 3.0468e-01, PNorm = 61.4172, GNorm = 1.0716, lr_0 = 2.3055e-04
Loss = 3.7696e-01, PNorm = 61.4214, GNorm = 1.6892, lr_0 = 2.3039e-04
Loss = 3.8756e-01, PNorm = 61.4220, GNorm = 1.3288, lr_0 = 2.3024e-04
Loss = 3.6682e-01, PNorm = 61.4250, GNorm = 1.3678, lr_0 = 2.3008e-04
Loss = 4.1529e-01, PNorm = 61.4267, GNorm = 1.0821, lr_0 = 2.2992e-04
Loss = 3.6810e-01, PNorm = 61.4299, GNorm = 1.5211, lr_0 = 2.2976e-04
Loss = 3.6401e-01, PNorm = 61.4322, GNorm = 1.7868, lr_0 = 2.2961e-04
Loss = 3.8497e-01, PNorm = 61.4332, GNorm = 1.7240, lr_0 = 2.2945e-04
Loss = 3.7967e-01, PNorm = 61.4367, GNorm = 1.2721, lr_0 = 2.2929e-04
Loss = 3.4545e-01, PNorm = 61.4397, GNorm = 1.2650, lr_0 = 2.2913e-04
Loss = 3.5258e-01, PNorm = 61.4399, GNorm = 1.4608, lr_0 = 2.2898e-04
Loss = 4.5244e-01, PNorm = 61.4405, GNorm = 1.4814, lr_0 = 2.2882e-04
Loss = 3.2326e-01, PNorm = 61.4452, GNorm = 1.4287, lr_0 = 2.2866e-04
Loss = 3.1803e-01, PNorm = 61.4481, GNorm = 1.3745, lr_0 = 2.2851e-04
Loss = 3.7900e-01, PNorm = 61.4512, GNorm = 1.1584, lr_0 = 2.2835e-04
Loss = 3.4299e-01, PNorm = 61.4511, GNorm = 2.0405, lr_0 = 2.2819e-04
Loss = 3.4986e-01, PNorm = 61.4513, GNorm = 1.6061, lr_0 = 2.2804e-04
Loss = 3.6171e-01, PNorm = 61.4519, GNorm = 1.4869, lr_0 = 2.2788e-04
Loss = 3.5356e-01, PNorm = 61.4550, GNorm = 1.0609, lr_0 = 2.2773e-04
Loss = 3.9508e-01, PNorm = 61.4559, GNorm = 2.4629, lr_0 = 2.2757e-04
Validation mae = 0.112025
Epoch 20
Loss = 3.4997e-01, PNorm = 61.4596, GNorm = 1.7499, lr_0 = 2.2741e-04
Loss = 3.4328e-01, PNorm = 61.4616, GNorm = 1.2880, lr_0 = 2.2726e-04
Loss = 3.3558e-01, PNorm = 61.4633, GNorm = 1.1944, lr_0 = 2.2710e-04
Loss = 4.1475e-01, PNorm = 61.4651, GNorm = 1.0595, lr_0 = 2.2695e-04
Loss = 3.2444e-01, PNorm = 61.4651, GNorm = 1.2577, lr_0 = 2.2679e-04
Loss = 3.6102e-01, PNorm = 61.4670, GNorm = 1.4285, lr_0 = 2.2664e-04
Loss = 3.5091e-01, PNorm = 61.4699, GNorm = 1.5878, lr_0 = 2.2648e-04
Loss = 3.4378e-01, PNorm = 61.4730, GNorm = 1.1625, lr_0 = 2.2632e-04
Loss = 3.6847e-01, PNorm = 61.4756, GNorm = 1.0938, lr_0 = 2.2617e-04
Loss = 3.5149e-01, PNorm = 61.4778, GNorm = 1.7020, lr_0 = 2.2601e-04
Loss = 3.5002e-01, PNorm = 61.4833, GNorm = 1.4381, lr_0 = 2.2586e-04
Loss = 3.6520e-01, PNorm = 61.4876, GNorm = 1.2164, lr_0 = 2.2571e-04
Loss = 3.8289e-01, PNorm = 61.4864, GNorm = 1.4981, lr_0 = 2.2555e-04
Loss = 3.8756e-01, PNorm = 61.4875, GNorm = 1.3084, lr_0 = 2.2540e-04
Loss = 3.9737e-01, PNorm = 61.4946, GNorm = 1.6836, lr_0 = 2.2524e-04
Loss = 3.4795e-01, PNorm = 61.4994, GNorm = 1.4545, lr_0 = 2.2509e-04
Loss = 3.7545e-01, PNorm = 61.5020, GNorm = 1.1307, lr_0 = 2.2493e-04
Loss = 3.8362e-01, PNorm = 61.5023, GNorm = 1.1653, lr_0 = 2.2478e-04
Loss = 3.6060e-01, PNorm = 61.5048, GNorm = 1.9186, lr_0 = 2.2463e-04
Loss = 3.3633e-01, PNorm = 61.5080, GNorm = 1.6430, lr_0 = 2.2447e-04
Loss = 3.9102e-01, PNorm = 61.5089, GNorm = 1.5235, lr_0 = 2.2432e-04
Loss = 3.7854e-01, PNorm = 61.5113, GNorm = 1.2701, lr_0 = 2.2416e-04
Loss = 3.2119e-01, PNorm = 61.5159, GNorm = 1.3986, lr_0 = 2.2401e-04
Loss = 3.6026e-01, PNorm = 61.5194, GNorm = 1.7307, lr_0 = 2.2386e-04
Loss = 4.3259e-01, PNorm = 61.5218, GNorm = 1.6021, lr_0 = 2.2370e-04
Loss = 3.3647e-01, PNorm = 61.5247, GNorm = 1.1671, lr_0 = 2.2355e-04
Loss = 4.0036e-01, PNorm = 61.5237, GNorm = 2.1011, lr_0 = 2.2340e-04
Loss = 3.3476e-01, PNorm = 61.5259, GNorm = 1.1149, lr_0 = 2.2324e-04
Loss = 3.6702e-01, PNorm = 61.5313, GNorm = 1.5583, lr_0 = 2.2309e-04
Loss = 3.7846e-01, PNorm = 61.5341, GNorm = 1.1926, lr_0 = 2.2294e-04
Loss = 3.4183e-01, PNorm = 61.5366, GNorm = 1.6096, lr_0 = 2.2279e-04
Loss = 3.8736e-01, PNorm = 61.5389, GNorm = 1.2076, lr_0 = 2.2263e-04
Loss = 3.9441e-01, PNorm = 61.5429, GNorm = 1.6071, lr_0 = 2.2248e-04
Loss = 3.0038e-01, PNorm = 61.5463, GNorm = 1.2191, lr_0 = 2.2233e-04
Loss = 3.3906e-01, PNorm = 61.5484, GNorm = 1.6988, lr_0 = 2.2218e-04
Loss = 4.8352e-01, PNorm = 61.5513, GNorm = 1.3647, lr_0 = 2.2202e-04
Loss = 3.6661e-01, PNorm = 61.5542, GNorm = 1.6853, lr_0 = 2.2187e-04
Loss = 3.5378e-01, PNorm = 61.5581, GNorm = 1.5113, lr_0 = 2.2172e-04
Loss = 3.5023e-01, PNorm = 61.5599, GNorm = 1.1931, lr_0 = 2.2157e-04
Loss = 3.3766e-01, PNorm = 61.5627, GNorm = 2.0364, lr_0 = 2.2142e-04
Loss = 3.8700e-01, PNorm = 61.5599, GNorm = 1.5427, lr_0 = 2.2126e-04
Loss = 3.7073e-01, PNorm = 61.5619, GNorm = 1.0813, lr_0 = 2.2111e-04
Loss = 3.8246e-01, PNorm = 61.5640, GNorm = 1.7969, lr_0 = 2.2096e-04
Loss = 4.0780e-01, PNorm = 61.5650, GNorm = 1.6338, lr_0 = 2.2081e-04
Loss = 3.6502e-01, PNorm = 61.5643, GNorm = 1.4950, lr_0 = 2.2066e-04
Loss = 3.6410e-01, PNorm = 61.5661, GNorm = 1.6197, lr_0 = 2.2051e-04
Loss = 4.0449e-01, PNorm = 61.5694, GNorm = 1.7149, lr_0 = 2.2036e-04
Loss = 3.6427e-01, PNorm = 61.5707, GNorm = 1.7615, lr_0 = 2.2021e-04
Loss = 3.6888e-01, PNorm = 61.5719, GNorm = 1.4830, lr_0 = 2.2005e-04
Loss = 3.6757e-01, PNorm = 61.5737, GNorm = 1.7385, lr_0 = 2.1990e-04
Loss = 3.7608e-01, PNorm = 61.5745, GNorm = 1.6933, lr_0 = 2.1975e-04
Loss = 3.9207e-01, PNorm = 61.5777, GNorm = 1.3114, lr_0 = 2.1960e-04
Loss = 3.7792e-01, PNorm = 61.5815, GNorm = 1.2848, lr_0 = 2.1945e-04
Loss = 3.6344e-01, PNorm = 61.5828, GNorm = 1.9995, lr_0 = 2.1930e-04
Loss = 3.5511e-01, PNorm = 61.5845, GNorm = 1.9220, lr_0 = 2.1915e-04
Loss = 3.5987e-01, PNorm = 61.5898, GNorm = 1.5572, lr_0 = 2.1900e-04
Loss = 3.6392e-01, PNorm = 61.5910, GNorm = 1.3433, lr_0 = 2.1885e-04
Loss = 3.4276e-01, PNorm = 61.5945, GNorm = 1.2452, lr_0 = 2.1870e-04
Loss = 3.5573e-01, PNorm = 61.5949, GNorm = 1.2211, lr_0 = 2.1855e-04
Loss = 3.3089e-01, PNorm = 61.5938, GNorm = 1.4173, lr_0 = 2.1840e-04
Loss = 3.4095e-01, PNorm = 61.5981, GNorm = 1.6311, lr_0 = 2.1825e-04
Loss = 3.1123e-01, PNorm = 61.6056, GNorm = 1.0685, lr_0 = 2.1810e-04
Loss = 3.8284e-01, PNorm = 61.6072, GNorm = 1.2297, lr_0 = 2.1795e-04
Loss = 3.0705e-01, PNorm = 61.6089, GNorm = 1.3623, lr_0 = 2.1780e-04
Loss = 3.7594e-01, PNorm = 61.6097, GNorm = 1.4656, lr_0 = 2.1765e-04
Loss = 3.4684e-01, PNorm = 61.6098, GNorm = 1.0903, lr_0 = 2.1751e-04
Loss = 3.7563e-01, PNorm = 61.6122, GNorm = 1.4641, lr_0 = 2.1736e-04
Loss = 3.3398e-01, PNorm = 61.6165, GNorm = 1.1234, lr_0 = 2.1721e-04
Loss = 3.7301e-01, PNorm = 61.6197, GNorm = 1.6538, lr_0 = 2.1706e-04
Loss = 3.9798e-01, PNorm = 61.6237, GNorm = 1.4253, lr_0 = 2.1691e-04
Loss = 3.4875e-01, PNorm = 61.6240, GNorm = 1.8848, lr_0 = 2.1676e-04
Loss = 3.4288e-01, PNorm = 61.6268, GNorm = 1.4379, lr_0 = 2.1661e-04
Loss = 3.3814e-01, PNorm = 61.6301, GNorm = 1.3200, lr_0 = 2.1646e-04
Loss = 4.0757e-01, PNorm = 61.6310, GNorm = 1.6743, lr_0 = 2.1632e-04
Loss = 3.9772e-01, PNorm = 61.6339, GNorm = 1.6151, lr_0 = 2.1617e-04
Loss = 3.2922e-01, PNorm = 61.6371, GNorm = 1.6080, lr_0 = 2.1602e-04
Loss = 3.3496e-01, PNorm = 61.6379, GNorm = 1.5404, lr_0 = 2.1587e-04
Loss = 3.7049e-01, PNorm = 61.6401, GNorm = 1.4760, lr_0 = 2.1572e-04
Loss = 3.5633e-01, PNorm = 61.6473, GNorm = 1.2573, lr_0 = 2.1558e-04
Loss = 3.9872e-01, PNorm = 61.6501, GNorm = 1.6927, lr_0 = 2.1543e-04
Loss = 3.5461e-01, PNorm = 61.6540, GNorm = 1.0569, lr_0 = 2.1528e-04
Loss = 3.9299e-01, PNorm = 61.6569, GNorm = 1.6651, lr_0 = 2.1513e-04
Loss = 3.6388e-01, PNorm = 61.6607, GNorm = 1.3564, lr_0 = 2.1499e-04
Loss = 3.8090e-01, PNorm = 61.6632, GNorm = 1.2580, lr_0 = 2.1484e-04
Loss = 3.7862e-01, PNorm = 61.6629, GNorm = 1.7560, lr_0 = 2.1469e-04
Loss = 3.6823e-01, PNorm = 61.6660, GNorm = 1.5393, lr_0 = 2.1454e-04
Loss = 3.5157e-01, PNorm = 61.6705, GNorm = 1.1432, lr_0 = 2.1440e-04
Loss = 3.4658e-01, PNorm = 61.6716, GNorm = 1.4893, lr_0 = 2.1425e-04
Loss = 3.3719e-01, PNorm = 61.6755, GNorm = 1.5933, lr_0 = 2.1410e-04
Loss = 3.7995e-01, PNorm = 61.6782, GNorm = 1.3553, lr_0 = 2.1396e-04
Loss = 3.5374e-01, PNorm = 61.6780, GNorm = 1.3570, lr_0 = 2.1381e-04
Loss = 3.9459e-01, PNorm = 61.6772, GNorm = 2.2773, lr_0 = 2.1366e-04
Loss = 3.2310e-01, PNorm = 61.6803, GNorm = 1.1410, lr_0 = 2.1352e-04
Loss = 3.8149e-01, PNorm = 61.6807, GNorm = 1.4758, lr_0 = 2.1337e-04
Loss = 4.1141e-01, PNorm = 61.6815, GNorm = 1.4635, lr_0 = 2.1323e-04
Loss = 3.5324e-01, PNorm = 61.6863, GNorm = 1.4447, lr_0 = 2.1308e-04
Loss = 3.7590e-01, PNorm = 61.6879, GNorm = 1.7009, lr_0 = 2.1293e-04
Loss = 4.0419e-01, PNorm = 61.6877, GNorm = 1.4241, lr_0 = 2.1279e-04
Loss = 3.5268e-01, PNorm = 61.6893, GNorm = 1.8729, lr_0 = 2.1264e-04
Loss = 3.7973e-01, PNorm = 61.6887, GNorm = 1.0140, lr_0 = 2.1250e-04
Loss = 3.5960e-01, PNorm = 61.6919, GNorm = 1.0737, lr_0 = 2.1235e-04
Loss = 3.2230e-01, PNorm = 61.6945, GNorm = 1.4615, lr_0 = 2.1221e-04
Loss = 3.6876e-01, PNorm = 61.6953, GNorm = 1.0311, lr_0 = 2.1206e-04
Loss = 3.7854e-01, PNorm = 61.6978, GNorm = 1.1000, lr_0 = 2.1191e-04
Loss = 3.6736e-01, PNorm = 61.7041, GNorm = 1.9904, lr_0 = 2.1177e-04
Loss = 3.7010e-01, PNorm = 61.7064, GNorm = 1.7418, lr_0 = 2.1162e-04
Loss = 4.0677e-01, PNorm = 61.7104, GNorm = 1.2422, lr_0 = 2.1148e-04
Loss = 3.3789e-01, PNorm = 61.7151, GNorm = 1.3686, lr_0 = 2.1133e-04
Loss = 3.5637e-01, PNorm = 61.7161, GNorm = 1.7741, lr_0 = 2.1119e-04
Loss = 3.6882e-01, PNorm = 61.7188, GNorm = 2.1025, lr_0 = 2.1104e-04
Loss = 3.4489e-01, PNorm = 61.7199, GNorm = 1.3937, lr_0 = 2.1090e-04
Loss = 3.4622e-01, PNorm = 61.7194, GNorm = 1.2265, lr_0 = 2.1076e-04
Loss = 4.0303e-01, PNorm = 61.7188, GNorm = 1.2995, lr_0 = 2.1061e-04
Loss = 3.3584e-01, PNorm = 61.7206, GNorm = 1.0887, lr_0 = 2.1047e-04
Loss = 4.0726e-01, PNorm = 61.7230, GNorm = 1.4960, lr_0 = 2.1032e-04
Loss = 4.0517e-01, PNorm = 61.7235, GNorm = 2.6129, lr_0 = 2.1018e-04
Loss = 3.5234e-01, PNorm = 61.7241, GNorm = 0.9969, lr_0 = 2.1003e-04
Loss = 4.1799e-01, PNorm = 61.7268, GNorm = 1.6290, lr_0 = 2.0989e-04
Loss = 3.7721e-01, PNorm = 61.7303, GNorm = 2.1128, lr_0 = 2.0975e-04
Loss = 3.5809e-01, PNorm = 61.7300, GNorm = 1.4111, lr_0 = 2.0960e-04
Validation mae = 0.114342
Epoch 21
Loss = 3.6983e-01, PNorm = 61.7340, GNorm = 2.0126, lr_0 = 2.0946e-04
Loss = 3.4400e-01, PNorm = 61.7349, GNorm = 0.9335, lr_0 = 2.0932e-04
Loss = 3.3486e-01, PNorm = 61.7392, GNorm = 1.4790, lr_0 = 2.0917e-04
Loss = 4.1472e-01, PNorm = 61.7428, GNorm = 1.5591, lr_0 = 2.0903e-04
Loss = 3.3357e-01, PNorm = 61.7460, GNorm = 2.1236, lr_0 = 2.0889e-04
Loss = 3.5131e-01, PNorm = 61.7499, GNorm = 1.5128, lr_0 = 2.0874e-04
Loss = 3.2059e-01, PNorm = 61.7486, GNorm = 0.9370, lr_0 = 2.0860e-04
Loss = 3.5238e-01, PNorm = 61.7504, GNorm = 1.7979, lr_0 = 2.0846e-04
Loss = 3.6632e-01, PNorm = 61.7505, GNorm = 1.4416, lr_0 = 2.0831e-04
Loss = 3.5118e-01, PNorm = 61.7549, GNorm = 1.3892, lr_0 = 2.0817e-04
Loss = 3.1801e-01, PNorm = 61.7567, GNorm = 1.5936, lr_0 = 2.0803e-04
Loss = 3.5359e-01, PNorm = 61.7582, GNorm = 1.5109, lr_0 = 2.0789e-04
Loss = 3.6347e-01, PNorm = 61.7628, GNorm = 1.3129, lr_0 = 2.0774e-04
Loss = 3.3748e-01, PNorm = 61.7650, GNorm = 1.5753, lr_0 = 2.0760e-04
Loss = 4.0050e-01, PNorm = 61.7650, GNorm = 1.4788, lr_0 = 2.0746e-04
Loss = 3.2517e-01, PNorm = 61.7653, GNorm = 1.3494, lr_0 = 2.0732e-04
Loss = 3.0636e-01, PNorm = 61.7662, GNorm = 1.1764, lr_0 = 2.0718e-04
Loss = 3.7043e-01, PNorm = 61.7700, GNorm = 1.5912, lr_0 = 2.0703e-04
Loss = 3.5031e-01, PNorm = 61.7756, GNorm = 1.4648, lr_0 = 2.0689e-04
Loss = 3.4151e-01, PNorm = 61.7781, GNorm = 1.7531, lr_0 = 2.0675e-04
Loss = 3.8621e-01, PNorm = 61.7796, GNorm = 1.4243, lr_0 = 2.0661e-04
Loss = 3.6733e-01, PNorm = 61.7815, GNorm = 2.1286, lr_0 = 2.0647e-04
Loss = 3.3617e-01, PNorm = 61.7827, GNorm = 1.5333, lr_0 = 2.0633e-04
Loss = 3.5605e-01, PNorm = 61.7817, GNorm = 1.5104, lr_0 = 2.0618e-04
Loss = 3.8514e-01, PNorm = 61.7849, GNorm = 1.5657, lr_0 = 2.0604e-04
Loss = 3.7123e-01, PNorm = 61.7875, GNorm = 1.3424, lr_0 = 2.0590e-04
Loss = 3.0113e-01, PNorm = 61.7904, GNorm = 1.6771, lr_0 = 2.0576e-04
Loss = 3.5648e-01, PNorm = 61.7949, GNorm = 1.2523, lr_0 = 2.0562e-04
Loss = 3.4142e-01, PNorm = 61.7961, GNorm = 0.9846, lr_0 = 2.0548e-04
Loss = 3.8406e-01, PNorm = 61.7965, GNorm = 1.8871, lr_0 = 2.0534e-04
Loss = 3.7417e-01, PNorm = 61.7964, GNorm = 1.4721, lr_0 = 2.0520e-04
Loss = 3.4177e-01, PNorm = 61.7953, GNorm = 1.3875, lr_0 = 2.0506e-04
Loss = 4.0326e-01, PNorm = 61.7990, GNorm = 2.0471, lr_0 = 2.0492e-04
Loss = 3.7088e-01, PNorm = 61.8042, GNorm = 1.7827, lr_0 = 2.0478e-04
Loss = 3.7175e-01, PNorm = 61.8097, GNorm = 1.3946, lr_0 = 2.0464e-04
Loss = 3.1400e-01, PNorm = 61.8131, GNorm = 1.2709, lr_0 = 2.0450e-04
Loss = 4.0397e-01, PNorm = 61.8127, GNorm = 1.2620, lr_0 = 2.0436e-04
Loss = 3.6633e-01, PNorm = 61.8161, GNorm = 1.0614, lr_0 = 2.0422e-04
Loss = 3.2859e-01, PNorm = 61.8188, GNorm = 1.1637, lr_0 = 2.0408e-04
Loss = 3.9921e-01, PNorm = 61.8246, GNorm = 1.4829, lr_0 = 2.0394e-04
Loss = 3.4718e-01, PNorm = 61.8274, GNorm = 1.5518, lr_0 = 2.0380e-04
Loss = 3.6070e-01, PNorm = 61.8248, GNorm = 1.6548, lr_0 = 2.0366e-04
Loss = 3.5433e-01, PNorm = 61.8244, GNorm = 1.6185, lr_0 = 2.0352e-04
Loss = 3.3099e-01, PNorm = 61.8297, GNorm = 1.2510, lr_0 = 2.0338e-04
Loss = 3.6926e-01, PNorm = 61.8323, GNorm = 1.3527, lr_0 = 2.0324e-04
Loss = 3.7590e-01, PNorm = 61.8338, GNorm = 1.7357, lr_0 = 2.0310e-04
Loss = 3.7946e-01, PNorm = 61.8352, GNorm = 2.3381, lr_0 = 2.0296e-04
Loss = 3.7486e-01, PNorm = 61.8363, GNorm = 1.4273, lr_0 = 2.0282e-04
Loss = 3.5250e-01, PNorm = 61.8371, GNorm = 1.4789, lr_0 = 2.0268e-04
Loss = 3.3064e-01, PNorm = 61.8380, GNorm = 1.0901, lr_0 = 2.0254e-04
Loss = 3.4496e-01, PNorm = 61.8360, GNorm = 1.3064, lr_0 = 2.0240e-04
Loss = 3.4438e-01, PNorm = 61.8351, GNorm = 1.0809, lr_0 = 2.0227e-04
Loss = 3.2496e-01, PNorm = 61.8359, GNorm = 1.5056, lr_0 = 2.0213e-04
Loss = 4.2027e-01, PNorm = 61.8364, GNorm = 1.7418, lr_0 = 2.0199e-04
Loss = 3.2059e-01, PNorm = 61.8358, GNorm = 1.4876, lr_0 = 2.0185e-04
Loss = 3.2232e-01, PNorm = 61.8378, GNorm = 1.4181, lr_0 = 2.0171e-04
Loss = 3.7291e-01, PNorm = 61.8404, GNorm = 1.4319, lr_0 = 2.0157e-04
Loss = 3.2689e-01, PNorm = 61.8441, GNorm = 1.2861, lr_0 = 2.0144e-04
Loss = 3.4306e-01, PNorm = 61.8465, GNorm = 1.4359, lr_0 = 2.0130e-04
Loss = 3.7615e-01, PNorm = 61.8453, GNorm = 2.1136, lr_0 = 2.0116e-04
Loss = 3.6428e-01, PNorm = 61.8497, GNorm = 1.7410, lr_0 = 2.0102e-04
Loss = 4.0518e-01, PNorm = 61.8529, GNorm = 1.6997, lr_0 = 2.0088e-04
Loss = 4.0361e-01, PNorm = 61.8532, GNorm = 1.8026, lr_0 = 2.0075e-04
Loss = 3.6173e-01, PNorm = 61.8589, GNorm = 1.8832, lr_0 = 2.0061e-04
Loss = 3.7627e-01, PNorm = 61.8591, GNorm = 1.9196, lr_0 = 2.0047e-04
Loss = 3.5310e-01, PNorm = 61.8606, GNorm = 1.3675, lr_0 = 2.0033e-04
Loss = 4.1725e-01, PNorm = 61.8607, GNorm = 1.3719, lr_0 = 2.0020e-04
Loss = 3.8687e-01, PNorm = 61.8646, GNorm = 1.1709, lr_0 = 2.0006e-04
Loss = 3.8376e-01, PNorm = 61.8709, GNorm = 1.4705, lr_0 = 1.9992e-04
Loss = 3.9431e-01, PNorm = 61.8698, GNorm = 0.9432, lr_0 = 1.9979e-04
Loss = 3.5337e-01, PNorm = 61.8727, GNorm = 1.7707, lr_0 = 1.9965e-04
Loss = 3.2750e-01, PNorm = 61.8797, GNorm = 2.0080, lr_0 = 1.9951e-04
Loss = 3.4736e-01, PNorm = 61.8799, GNorm = 1.1186, lr_0 = 1.9938e-04
Loss = 3.9247e-01, PNorm = 61.8827, GNorm = 2.0597, lr_0 = 1.9924e-04
Loss = 3.5684e-01, PNorm = 61.8890, GNorm = 1.7341, lr_0 = 1.9910e-04
Loss = 3.7000e-01, PNorm = 61.8881, GNorm = 1.1672, lr_0 = 1.9897e-04
Loss = 3.7468e-01, PNorm = 61.8876, GNorm = 1.7099, lr_0 = 1.9883e-04
Loss = 4.1830e-01, PNorm = 61.8925, GNorm = 1.3399, lr_0 = 1.9869e-04
Loss = 3.3490e-01, PNorm = 61.8969, GNorm = 1.7013, lr_0 = 1.9856e-04
Loss = 3.1661e-01, PNorm = 61.8969, GNorm = 1.2219, lr_0 = 1.9842e-04
Loss = 3.6561e-01, PNorm = 61.8996, GNorm = 1.4265, lr_0 = 1.9829e-04
Loss = 3.3868e-01, PNorm = 61.9013, GNorm = 1.1471, lr_0 = 1.9815e-04
Loss = 3.7573e-01, PNorm = 61.9032, GNorm = 3.8143, lr_0 = 1.9801e-04
Loss = 3.9359e-01, PNorm = 61.9027, GNorm = 1.2404, lr_0 = 1.9788e-04
Loss = 3.8483e-01, PNorm = 61.9039, GNorm = 1.3081, lr_0 = 1.9774e-04
Loss = 3.8061e-01, PNorm = 61.9042, GNorm = 1.5084, lr_0 = 1.9761e-04
Loss = 4.4097e-01, PNorm = 61.9055, GNorm = 2.2636, lr_0 = 1.9747e-04
Loss = 3.4361e-01, PNorm = 61.9115, GNorm = 1.8652, lr_0 = 1.9734e-04
Loss = 3.8211e-01, PNorm = 61.9120, GNorm = 1.6329, lr_0 = 1.9720e-04
Loss = 3.8133e-01, PNorm = 61.9144, GNorm = 1.8977, lr_0 = 1.9707e-04
Loss = 3.1482e-01, PNorm = 61.9194, GNorm = 1.4185, lr_0 = 1.9693e-04
Loss = 3.9721e-01, PNorm = 61.9216, GNorm = 1.2484, lr_0 = 1.9680e-04
Loss = 3.5231e-01, PNorm = 61.9229, GNorm = 1.7447, lr_0 = 1.9666e-04
Loss = 3.3760e-01, PNorm = 61.9234, GNorm = 1.4129, lr_0 = 1.9653e-04
Loss = 3.3700e-01, PNorm = 61.9254, GNorm = 2.0947, lr_0 = 1.9639e-04
Loss = 3.5952e-01, PNorm = 61.9258, GNorm = 1.3706, lr_0 = 1.9626e-04
Loss = 3.6424e-01, PNorm = 61.9318, GNorm = 1.6641, lr_0 = 1.9612e-04
Loss = 4.2465e-01, PNorm = 61.9350, GNorm = 2.1392, lr_0 = 1.9599e-04
Loss = 3.3872e-01, PNorm = 61.9393, GNorm = 1.5904, lr_0 = 1.9585e-04
Loss = 3.4359e-01, PNorm = 61.9398, GNorm = 1.2427, lr_0 = 1.9572e-04
Loss = 3.8285e-01, PNorm = 61.9370, GNorm = 2.3122, lr_0 = 1.9559e-04
Loss = 3.3495e-01, PNorm = 61.9388, GNorm = 1.0534, lr_0 = 1.9545e-04
Loss = 3.7922e-01, PNorm = 61.9446, GNorm = 1.5576, lr_0 = 1.9532e-04
Loss = 3.9656e-01, PNorm = 61.9482, GNorm = 1.3598, lr_0 = 1.9518e-04
Loss = 3.5269e-01, PNorm = 61.9504, GNorm = 1.8282, lr_0 = 1.9505e-04
Loss = 3.6472e-01, PNorm = 61.9485, GNorm = 1.3901, lr_0 = 1.9492e-04
Loss = 3.5238e-01, PNorm = 61.9457, GNorm = 1.1460, lr_0 = 1.9478e-04
Loss = 3.7036e-01, PNorm = 61.9485, GNorm = 1.3538, lr_0 = 1.9465e-04
Loss = 3.1644e-01, PNorm = 61.9514, GNorm = 1.0572, lr_0 = 1.9452e-04
Loss = 3.9907e-01, PNorm = 61.9497, GNorm = 2.1101, lr_0 = 1.9438e-04
Loss = 4.1782e-01, PNorm = 61.9543, GNorm = 1.9659, lr_0 = 1.9425e-04
Loss = 3.5896e-01, PNorm = 61.9582, GNorm = 2.2202, lr_0 = 1.9412e-04
Loss = 3.8299e-01, PNorm = 61.9611, GNorm = 1.6419, lr_0 = 1.9398e-04
Loss = 3.5383e-01, PNorm = 61.9650, GNorm = 1.5423, lr_0 = 1.9385e-04
Loss = 3.7087e-01, PNorm = 61.9678, GNorm = 1.9780, lr_0 = 1.9372e-04
Loss = 3.3388e-01, PNorm = 61.9718, GNorm = 1.1299, lr_0 = 1.9359e-04
Loss = 3.2117e-01, PNorm = 61.9739, GNorm = 1.4006, lr_0 = 1.9345e-04
Loss = 3.4804e-01, PNorm = 61.9761, GNorm = 1.9892, lr_0 = 1.9332e-04
Loss = 3.5274e-01, PNorm = 61.9773, GNorm = 2.0750, lr_0 = 1.9319e-04
Loss = 3.5098e-01, PNorm = 61.9790, GNorm = 1.2174, lr_0 = 1.9306e-04
Validation mae = 0.111446
Epoch 22
Loss = 3.3345e-01, PNorm = 61.9833, GNorm = 1.2275, lr_0 = 1.9292e-04
Loss = 3.8844e-01, PNorm = 61.9856, GNorm = 2.3048, lr_0 = 1.9279e-04
Loss = 3.3907e-01, PNorm = 61.9830, GNorm = 0.9793, lr_0 = 1.9266e-04
Loss = 3.2703e-01, PNorm = 61.9836, GNorm = 1.3130, lr_0 = 1.9253e-04
Loss = 3.4584e-01, PNorm = 61.9863, GNorm = 1.1706, lr_0 = 1.9240e-04
Loss = 3.3298e-01, PNorm = 61.9907, GNorm = 1.1483, lr_0 = 1.9226e-04
Loss = 3.4089e-01, PNorm = 61.9932, GNorm = 1.3301, lr_0 = 1.9213e-04
Loss = 3.4670e-01, PNorm = 61.9953, GNorm = 1.4332, lr_0 = 1.9200e-04
Loss = 3.8499e-01, PNorm = 61.9983, GNorm = 1.2129, lr_0 = 1.9187e-04
Loss = 3.3098e-01, PNorm = 61.9989, GNorm = 1.2773, lr_0 = 1.9174e-04
Loss = 3.6191e-01, PNorm = 62.0020, GNorm = 1.3747, lr_0 = 1.9161e-04
Loss = 3.1048e-01, PNorm = 62.0023, GNorm = 1.1105, lr_0 = 1.9148e-04
Loss = 3.7864e-01, PNorm = 62.0048, GNorm = 1.6584, lr_0 = 1.9134e-04
Loss = 3.3833e-01, PNorm = 62.0088, GNorm = 1.1898, lr_0 = 1.9121e-04
Loss = 3.7986e-01, PNorm = 62.0126, GNorm = 2.2834, lr_0 = 1.9108e-04
Loss = 3.5628e-01, PNorm = 62.0145, GNorm = 1.4586, lr_0 = 1.9095e-04
Loss = 3.4723e-01, PNorm = 62.0162, GNorm = 1.2901, lr_0 = 1.9082e-04
Loss = 3.4648e-01, PNorm = 62.0176, GNorm = 1.4777, lr_0 = 1.9069e-04
Loss = 3.8026e-01, PNorm = 62.0178, GNorm = 1.8885, lr_0 = 1.9056e-04
Loss = 4.0223e-01, PNorm = 62.0217, GNorm = 1.7653, lr_0 = 1.9043e-04
Loss = 3.2722e-01, PNorm = 62.0239, GNorm = 1.2172, lr_0 = 1.9030e-04
Loss = 3.3943e-01, PNorm = 62.0267, GNorm = 1.3608, lr_0 = 1.9017e-04
Loss = 3.6573e-01, PNorm = 62.0304, GNorm = 1.4195, lr_0 = 1.9004e-04
Loss = 3.8477e-01, PNorm = 62.0329, GNorm = 1.4848, lr_0 = 1.8991e-04
Loss = 3.7091e-01, PNorm = 62.0327, GNorm = 1.1795, lr_0 = 1.8978e-04
Loss = 3.4734e-01, PNorm = 62.0336, GNorm = 1.0596, lr_0 = 1.8965e-04
Loss = 3.5963e-01, PNorm = 62.0350, GNorm = 1.3642, lr_0 = 1.8952e-04
Loss = 3.7829e-01, PNorm = 62.0393, GNorm = 1.3297, lr_0 = 1.8939e-04
Loss = 3.0328e-01, PNorm = 62.0436, GNorm = 1.3043, lr_0 = 1.8926e-04
Loss = 3.6379e-01, PNorm = 62.0412, GNorm = 1.5262, lr_0 = 1.8913e-04
Loss = 3.6558e-01, PNorm = 62.0416, GNorm = 1.6516, lr_0 = 1.8900e-04
Loss = 3.5754e-01, PNorm = 62.0475, GNorm = 1.3746, lr_0 = 1.8887e-04
Loss = 4.0577e-01, PNorm = 62.0483, GNorm = 1.5198, lr_0 = 1.8874e-04
Loss = 3.3716e-01, PNorm = 62.0500, GNorm = 1.6080, lr_0 = 1.8861e-04
Loss = 3.7129e-01, PNorm = 62.0531, GNorm = 1.4044, lr_0 = 1.8848e-04
Loss = 3.7590e-01, PNorm = 62.0540, GNorm = 1.4026, lr_0 = 1.8835e-04
Loss = 3.5079e-01, PNorm = 62.0552, GNorm = 1.5068, lr_0 = 1.8822e-04
Loss = 3.8544e-01, PNorm = 62.0564, GNorm = 1.6046, lr_0 = 1.8809e-04
Loss = 3.6867e-01, PNorm = 62.0596, GNorm = 1.5439, lr_0 = 1.8797e-04
Loss = 3.5371e-01, PNorm = 62.0601, GNorm = 1.6831, lr_0 = 1.8784e-04
Loss = 3.6541e-01, PNorm = 62.0609, GNorm = 1.3160, lr_0 = 1.8771e-04
Loss = 4.0009e-01, PNorm = 62.0662, GNorm = 1.8607, lr_0 = 1.8758e-04
Loss = 4.1708e-01, PNorm = 62.0682, GNorm = 1.5031, lr_0 = 1.8745e-04
Loss = 3.4434e-01, PNorm = 62.0713, GNorm = 1.7679, lr_0 = 1.8732e-04
Loss = 3.6546e-01, PNorm = 62.0727, GNorm = 1.8049, lr_0 = 1.8719e-04
Loss = 3.9273e-01, PNorm = 62.0716, GNorm = 1.4433, lr_0 = 1.8707e-04
Loss = 3.1682e-01, PNorm = 62.0725, GNorm = 1.5248, lr_0 = 1.8694e-04
Loss = 3.3890e-01, PNorm = 62.0729, GNorm = 1.1963, lr_0 = 1.8681e-04
Loss = 3.9551e-01, PNorm = 62.0742, GNorm = 1.6121, lr_0 = 1.8668e-04
Loss = 4.0004e-01, PNorm = 62.0793, GNorm = 1.3584, lr_0 = 1.8655e-04
Loss = 3.3361e-01, PNorm = 62.0818, GNorm = 1.1811, lr_0 = 1.8643e-04
Loss = 3.5167e-01, PNorm = 62.0833, GNorm = 1.5425, lr_0 = 1.8630e-04
Loss = 4.2765e-01, PNorm = 62.0853, GNorm = 1.4155, lr_0 = 1.8617e-04
Loss = 3.4225e-01, PNorm = 62.0864, GNorm = 1.7385, lr_0 = 1.8604e-04
Loss = 3.7999e-01, PNorm = 62.0864, GNorm = 1.4500, lr_0 = 1.8592e-04
Loss = 3.7687e-01, PNorm = 62.0859, GNorm = 1.4058, lr_0 = 1.8579e-04
Loss = 3.1621e-01, PNorm = 62.0868, GNorm = 1.1581, lr_0 = 1.8566e-04
Loss = 3.1993e-01, PNorm = 62.0877, GNorm = 1.7314, lr_0 = 1.8553e-04
Loss = 3.0392e-01, PNorm = 62.0877, GNorm = 1.5856, lr_0 = 1.8541e-04
Loss = 3.4940e-01, PNorm = 62.0892, GNorm = 1.1154, lr_0 = 1.8528e-04
Loss = 3.6674e-01, PNorm = 62.0895, GNorm = 1.4856, lr_0 = 1.8515e-04
Loss = 3.6628e-01, PNorm = 62.0929, GNorm = 1.6846, lr_0 = 1.8503e-04
Loss = 3.3806e-01, PNorm = 62.0987, GNorm = 1.1214, lr_0 = 1.8490e-04
Loss = 3.6660e-01, PNorm = 62.0983, GNorm = 1.4468, lr_0 = 1.8477e-04
Loss = 3.2439e-01, PNorm = 62.0986, GNorm = 1.1066, lr_0 = 1.8465e-04
Loss = 3.9653e-01, PNorm = 62.1004, GNorm = 1.3757, lr_0 = 1.8452e-04
Loss = 3.0135e-01, PNorm = 62.1037, GNorm = 1.6776, lr_0 = 1.8439e-04
Loss = 3.0284e-01, PNorm = 62.1070, GNorm = 1.3228, lr_0 = 1.8427e-04
Loss = 3.5402e-01, PNorm = 62.1100, GNorm = 1.5304, lr_0 = 1.8414e-04
Loss = 3.5991e-01, PNorm = 62.1119, GNorm = 1.4008, lr_0 = 1.8401e-04
Loss = 3.4510e-01, PNorm = 62.1119, GNorm = 1.2158, lr_0 = 1.8389e-04
Loss = 4.4868e-01, PNorm = 62.1166, GNorm = 1.2229, lr_0 = 1.8376e-04
Loss = 3.6972e-01, PNorm = 62.1196, GNorm = 1.7792, lr_0 = 1.8364e-04
Loss = 3.1790e-01, PNorm = 62.1213, GNorm = 1.1343, lr_0 = 1.8351e-04
Loss = 3.2644e-01, PNorm = 62.1238, GNorm = 1.2292, lr_0 = 1.8338e-04
Loss = 3.8081e-01, PNorm = 62.1229, GNorm = 1.2346, lr_0 = 1.8326e-04
Loss = 3.5235e-01, PNorm = 62.1236, GNorm = 1.5786, lr_0 = 1.8313e-04
Loss = 4.0275e-01, PNorm = 62.1242, GNorm = 1.3432, lr_0 = 1.8301e-04
Loss = 3.3175e-01, PNorm = 62.1243, GNorm = 1.3153, lr_0 = 1.8288e-04
Loss = 3.6936e-01, PNorm = 62.1266, GNorm = 1.6369, lr_0 = 1.8276e-04
Loss = 3.3434e-01, PNorm = 62.1304, GNorm = 1.5969, lr_0 = 1.8263e-04
Loss = 3.7824e-01, PNorm = 62.1325, GNorm = 1.2135, lr_0 = 1.8251e-04
Loss = 3.5702e-01, PNorm = 62.1344, GNorm = 1.5025, lr_0 = 1.8238e-04
Loss = 3.4730e-01, PNorm = 62.1371, GNorm = 1.8645, lr_0 = 1.8226e-04
Loss = 3.4361e-01, PNorm = 62.1391, GNorm = 1.4439, lr_0 = 1.8213e-04
Loss = 3.6190e-01, PNorm = 62.1417, GNorm = 2.4302, lr_0 = 1.8201e-04
Loss = 3.4099e-01, PNorm = 62.1415, GNorm = 1.3092, lr_0 = 1.8188e-04
Loss = 3.3685e-01, PNorm = 62.1418, GNorm = 1.5709, lr_0 = 1.8176e-04
Loss = 3.3115e-01, PNorm = 62.1419, GNorm = 1.4345, lr_0 = 1.8163e-04
Loss = 4.0523e-01, PNorm = 62.1444, GNorm = 1.5217, lr_0 = 1.8151e-04
Loss = 3.6188e-01, PNorm = 62.1462, GNorm = 1.3268, lr_0 = 1.8138e-04
Loss = 3.6630e-01, PNorm = 62.1475, GNorm = 1.6532, lr_0 = 1.8126e-04
Loss = 3.7953e-01, PNorm = 62.1473, GNorm = 1.5767, lr_0 = 1.8114e-04
Loss = 3.7848e-01, PNorm = 62.1481, GNorm = 1.6378, lr_0 = 1.8101e-04
Loss = 3.5166e-01, PNorm = 62.1495, GNorm = 1.5780, lr_0 = 1.8089e-04
Loss = 3.7537e-01, PNorm = 62.1533, GNorm = 1.8198, lr_0 = 1.8076e-04
Loss = 3.7538e-01, PNorm = 62.1557, GNorm = 1.3349, lr_0 = 1.8064e-04
Loss = 3.2216e-01, PNorm = 62.1603, GNorm = 1.5213, lr_0 = 1.8052e-04
Loss = 3.2173e-01, PNorm = 62.1629, GNorm = 1.3175, lr_0 = 1.8039e-04
Loss = 3.2285e-01, PNorm = 62.1633, GNorm = 1.2651, lr_0 = 1.8027e-04
Loss = 3.7899e-01, PNorm = 62.1637, GNorm = 1.2366, lr_0 = 1.8015e-04
Loss = 3.3044e-01, PNorm = 62.1664, GNorm = 1.5188, lr_0 = 1.8002e-04
Loss = 3.8431e-01, PNorm = 62.1669, GNorm = 1.3864, lr_0 = 1.7990e-04
Loss = 3.9983e-01, PNorm = 62.1682, GNorm = 1.4319, lr_0 = 1.7978e-04
Loss = 3.0505e-01, PNorm = 62.1715, GNorm = 1.3793, lr_0 = 1.7965e-04
Loss = 3.5068e-01, PNorm = 62.1729, GNorm = 1.9489, lr_0 = 1.7953e-04
Loss = 4.0523e-01, PNorm = 62.1743, GNorm = 1.6859, lr_0 = 1.7941e-04
Loss = 3.8392e-01, PNorm = 62.1794, GNorm = 0.9323, lr_0 = 1.7928e-04
Loss = 3.6200e-01, PNorm = 62.1792, GNorm = 1.2800, lr_0 = 1.7916e-04
Loss = 3.9316e-01, PNorm = 62.1793, GNorm = 1.3779, lr_0 = 1.7904e-04
Loss = 3.1590e-01, PNorm = 62.1831, GNorm = 1.0507, lr_0 = 1.7892e-04
Loss = 3.7516e-01, PNorm = 62.1869, GNorm = 1.6646, lr_0 = 1.7879e-04
Loss = 3.1215e-01, PNorm = 62.1875, GNorm = 1.1821, lr_0 = 1.7867e-04
Loss = 3.7988e-01, PNorm = 62.1879, GNorm = 2.0031, lr_0 = 1.7855e-04
Loss = 3.6965e-01, PNorm = 62.1904, GNorm = 1.4605, lr_0 = 1.7843e-04
Loss = 3.6963e-01, PNorm = 62.1922, GNorm = 1.2254, lr_0 = 1.7830e-04
Loss = 3.4898e-01, PNorm = 62.1958, GNorm = 1.7460, lr_0 = 1.7818e-04
Loss = 3.6769e-01, PNorm = 62.2003, GNorm = 1.5185, lr_0 = 1.7806e-04
Loss = 3.4575e-01, PNorm = 62.2033, GNorm = 1.2255, lr_0 = 1.7794e-04
Loss = 3.7379e-01, PNorm = 62.2047, GNorm = 1.5283, lr_0 = 1.7782e-04
Validation mae = 0.111003
Epoch 23
Loss = 3.3558e-01, PNorm = 62.2053, GNorm = 1.7451, lr_0 = 1.7769e-04
Loss = 3.3063e-01, PNorm = 62.2067, GNorm = 1.2279, lr_0 = 1.7757e-04
Loss = 3.5496e-01, PNorm = 62.2069, GNorm = 1.6169, lr_0 = 1.7745e-04
Loss = 3.7494e-01, PNorm = 62.2088, GNorm = 1.0187, lr_0 = 1.7733e-04
Loss = 3.7285e-01, PNorm = 62.2079, GNorm = 2.1224, lr_0 = 1.7721e-04
Loss = 3.8043e-01, PNorm = 62.2101, GNorm = 1.1879, lr_0 = 1.7709e-04
Loss = 3.6281e-01, PNorm = 62.2138, GNorm = 1.4303, lr_0 = 1.7696e-04
Loss = 3.2681e-01, PNorm = 62.2162, GNorm = 1.5111, lr_0 = 1.7684e-04
Loss = 3.1892e-01, PNorm = 62.2198, GNorm = 1.4833, lr_0 = 1.7672e-04
Loss = 3.5656e-01, PNorm = 62.2203, GNorm = 1.7559, lr_0 = 1.7660e-04
Loss = 3.4414e-01, PNorm = 62.2202, GNorm = 1.5580, lr_0 = 1.7648e-04
Loss = 3.0569e-01, PNorm = 62.2230, GNorm = 1.2319, lr_0 = 1.7636e-04
Loss = 3.2489e-01, PNorm = 62.2236, GNorm = 1.2383, lr_0 = 1.7624e-04
Loss = 3.4025e-01, PNorm = 62.2243, GNorm = 1.5828, lr_0 = 1.7612e-04
Loss = 3.3626e-01, PNorm = 62.2265, GNorm = 1.3970, lr_0 = 1.7600e-04
Loss = 3.2148e-01, PNorm = 62.2304, GNorm = 1.2675, lr_0 = 1.7588e-04
Loss = 3.6555e-01, PNorm = 62.2305, GNorm = 1.7274, lr_0 = 1.7576e-04
Loss = 2.8805e-01, PNorm = 62.2321, GNorm = 0.9948, lr_0 = 1.7564e-04
Loss = 3.2171e-01, PNorm = 62.2332, GNorm = 1.4433, lr_0 = 1.7552e-04
Loss = 4.6536e-01, PNorm = 62.2346, GNorm = 1.7351, lr_0 = 1.7540e-04
Loss = 4.0520e-01, PNorm = 62.2367, GNorm = 1.3539, lr_0 = 1.7528e-04
Loss = 3.5651e-01, PNorm = 62.2396, GNorm = 1.5984, lr_0 = 1.7516e-04
Loss = 3.5059e-01, PNorm = 62.2394, GNorm = 1.4334, lr_0 = 1.7504e-04
Loss = 2.8894e-01, PNorm = 62.2401, GNorm = 1.1665, lr_0 = 1.7492e-04
Loss = 4.0477e-01, PNorm = 62.2422, GNorm = 1.5731, lr_0 = 1.7480e-04
Loss = 3.6025e-01, PNorm = 62.2456, GNorm = 1.9983, lr_0 = 1.7468e-04
Loss = 3.4309e-01, PNorm = 62.2471, GNorm = 1.5657, lr_0 = 1.7456e-04
Loss = 3.6921e-01, PNorm = 62.2467, GNorm = 1.3296, lr_0 = 1.7444e-04
Loss = 3.3637e-01, PNorm = 62.2466, GNorm = 1.0738, lr_0 = 1.7432e-04
Loss = 4.0225e-01, PNorm = 62.2487, GNorm = 1.2133, lr_0 = 1.7420e-04
Loss = 4.0103e-01, PNorm = 62.2509, GNorm = 0.9810, lr_0 = 1.7408e-04
Loss = 3.8331e-01, PNorm = 62.2520, GNorm = 1.8917, lr_0 = 1.7396e-04
Loss = 3.4991e-01, PNorm = 62.2543, GNorm = 2.5482, lr_0 = 1.7384e-04
Loss = 3.6603e-01, PNorm = 62.2546, GNorm = 1.6299, lr_0 = 1.7372e-04
Loss = 4.1096e-01, PNorm = 62.2557, GNorm = 1.4186, lr_0 = 1.7360e-04
Loss = 3.8107e-01, PNorm = 62.2577, GNorm = 1.2719, lr_0 = 1.7348e-04
Loss = 3.5566e-01, PNorm = 62.2585, GNorm = 1.3858, lr_0 = 1.7336e-04
Loss = 3.3350e-01, PNorm = 62.2601, GNorm = 1.6627, lr_0 = 1.7325e-04
Loss = 3.2103e-01, PNorm = 62.2621, GNorm = 1.3228, lr_0 = 1.7313e-04
Loss = 3.2684e-01, PNorm = 62.2626, GNorm = 1.5559, lr_0 = 1.7301e-04
Loss = 3.2462e-01, PNorm = 62.2659, GNorm = 1.3346, lr_0 = 1.7289e-04
Loss = 3.5136e-01, PNorm = 62.2650, GNorm = 1.4570, lr_0 = 1.7277e-04
Loss = 2.9947e-01, PNorm = 62.2651, GNorm = 1.5805, lr_0 = 1.7265e-04
Loss = 3.4239e-01, PNorm = 62.2657, GNorm = 1.5538, lr_0 = 1.7253e-04
Loss = 3.6473e-01, PNorm = 62.2666, GNorm = 1.6820, lr_0 = 1.7242e-04
Loss = 3.7476e-01, PNorm = 62.2701, GNorm = 1.5731, lr_0 = 1.7230e-04
Loss = 3.5551e-01, PNorm = 62.2728, GNorm = 1.6497, lr_0 = 1.7218e-04
Loss = 3.4790e-01, PNorm = 62.2732, GNorm = 1.1189, lr_0 = 1.7206e-04
Loss = 3.6118e-01, PNorm = 62.2741, GNorm = 1.5796, lr_0 = 1.7194e-04
Loss = 3.3033e-01, PNorm = 62.2755, GNorm = 1.3557, lr_0 = 1.7183e-04
Loss = 3.2411e-01, PNorm = 62.2767, GNorm = 1.5427, lr_0 = 1.7171e-04
Loss = 3.8030e-01, PNorm = 62.2806, GNorm = 1.0988, lr_0 = 1.7159e-04
Loss = 3.3183e-01, PNorm = 62.2825, GNorm = 1.6253, lr_0 = 1.7147e-04
Loss = 3.9016e-01, PNorm = 62.2823, GNorm = 1.7224, lr_0 = 1.7136e-04
Loss = 3.2606e-01, PNorm = 62.2856, GNorm = 1.2789, lr_0 = 1.7124e-04
Loss = 3.5680e-01, PNorm = 62.2862, GNorm = 1.3465, lr_0 = 1.7112e-04
Loss = 3.6457e-01, PNorm = 62.2880, GNorm = 2.5046, lr_0 = 1.7100e-04
Loss = 4.5332e-01, PNorm = 62.2924, GNorm = 1.5319, lr_0 = 1.7089e-04
Loss = 3.5548e-01, PNorm = 62.2939, GNorm = 2.0322, lr_0 = 1.7077e-04
Loss = 3.5308e-01, PNorm = 62.2961, GNorm = 1.7089, lr_0 = 1.7065e-04
Loss = 4.4481e-01, PNorm = 62.2983, GNorm = 4.3763, lr_0 = 1.7054e-04
Loss = 3.5644e-01, PNorm = 62.2985, GNorm = 1.4338, lr_0 = 1.7042e-04
Loss = 3.2536e-01, PNorm = 62.2992, GNorm = 1.4547, lr_0 = 1.7030e-04
Loss = 3.2131e-01, PNorm = 62.3013, GNorm = 2.0865, lr_0 = 1.7019e-04
Loss = 3.3227e-01, PNorm = 62.3030, GNorm = 1.0739, lr_0 = 1.7007e-04
Loss = 3.3374e-01, PNorm = 62.3040, GNorm = 1.2314, lr_0 = 1.6995e-04
Loss = 3.6266e-01, PNorm = 62.3055, GNorm = 1.4411, lr_0 = 1.6984e-04
Loss = 3.6045e-01, PNorm = 62.3092, GNorm = 1.3259, lr_0 = 1.6972e-04
Loss = 3.3370e-01, PNorm = 62.3085, GNorm = 1.4770, lr_0 = 1.6960e-04
Loss = 3.7890e-01, PNorm = 62.3094, GNorm = 1.7448, lr_0 = 1.6949e-04
Loss = 3.3582e-01, PNorm = 62.3108, GNorm = 1.2276, lr_0 = 1.6937e-04
Loss = 3.3514e-01, PNorm = 62.3116, GNorm = 1.8584, lr_0 = 1.6926e-04
Loss = 3.4653e-01, PNorm = 62.3139, GNorm = 2.0234, lr_0 = 1.6914e-04
Loss = 3.5944e-01, PNorm = 62.3158, GNorm = 1.7975, lr_0 = 1.6902e-04
Loss = 4.3304e-01, PNorm = 62.3189, GNorm = 1.4050, lr_0 = 1.6891e-04
Loss = 3.5592e-01, PNorm = 62.3178, GNorm = 1.6817, lr_0 = 1.6879e-04
Loss = 3.2063e-01, PNorm = 62.3181, GNorm = 1.2664, lr_0 = 1.6868e-04
Loss = 3.4584e-01, PNorm = 62.3202, GNorm = 1.2913, lr_0 = 1.6856e-04
Loss = 3.3610e-01, PNorm = 62.3229, GNorm = 1.2345, lr_0 = 1.6845e-04
Loss = 3.6750e-01, PNorm = 62.3232, GNorm = 1.2092, lr_0 = 1.6833e-04
Loss = 3.3551e-01, PNorm = 62.3226, GNorm = 1.3037, lr_0 = 1.6821e-04
Loss = 4.0740e-01, PNorm = 62.3246, GNorm = 1.5402, lr_0 = 1.6810e-04
Loss = 3.3911e-01, PNorm = 62.3248, GNorm = 1.3458, lr_0 = 1.6798e-04
Loss = 4.0214e-01, PNorm = 62.3265, GNorm = 1.5907, lr_0 = 1.6787e-04
Loss = 3.9025e-01, PNorm = 62.3291, GNorm = 1.5031, lr_0 = 1.6775e-04
Loss = 3.5223e-01, PNorm = 62.3323, GNorm = 1.4269, lr_0 = 1.6764e-04
Loss = 3.6105e-01, PNorm = 62.3354, GNorm = 1.4567, lr_0 = 1.6752e-04
Loss = 3.4998e-01, PNorm = 62.3371, GNorm = 1.7650, lr_0 = 1.6741e-04
Loss = 3.3115e-01, PNorm = 62.3379, GNorm = 1.6747, lr_0 = 1.6729e-04
Loss = 4.0314e-01, PNorm = 62.3381, GNorm = 2.1615, lr_0 = 1.6718e-04
Loss = 3.8797e-01, PNorm = 62.3412, GNorm = 1.5213, lr_0 = 1.6707e-04
Loss = 3.1887e-01, PNorm = 62.3439, GNorm = 2.0486, lr_0 = 1.6695e-04
Loss = 3.9401e-01, PNorm = 62.3443, GNorm = 1.3361, lr_0 = 1.6684e-04
Loss = 4.0963e-01, PNorm = 62.3472, GNorm = 1.3398, lr_0 = 1.6672e-04
Loss = 3.6290e-01, PNorm = 62.3475, GNorm = 1.9605, lr_0 = 1.6661e-04
Loss = 3.7246e-01, PNorm = 62.3471, GNorm = 1.1427, lr_0 = 1.6649e-04
Loss = 3.8479e-01, PNorm = 62.3508, GNorm = 1.5820, lr_0 = 1.6638e-04
Loss = 3.3805e-01, PNorm = 62.3536, GNorm = 1.8187, lr_0 = 1.6627e-04
Loss = 3.6262e-01, PNorm = 62.3552, GNorm = 1.4897, lr_0 = 1.6615e-04
Loss = 3.5342e-01, PNorm = 62.3574, GNorm = 1.4986, lr_0 = 1.6604e-04
Loss = 3.6336e-01, PNorm = 62.3599, GNorm = 1.3923, lr_0 = 1.6592e-04
Loss = 3.8505e-01, PNorm = 62.3612, GNorm = 1.6166, lr_0 = 1.6581e-04
Loss = 3.8362e-01, PNorm = 62.3627, GNorm = 1.6027, lr_0 = 1.6570e-04
Loss = 4.0968e-01, PNorm = 62.3625, GNorm = 2.2413, lr_0 = 1.6558e-04
Loss = 3.7306e-01, PNorm = 62.3645, GNorm = 1.4771, lr_0 = 1.6547e-04
Loss = 3.1832e-01, PNorm = 62.3680, GNorm = 1.1650, lr_0 = 1.6536e-04
Loss = 3.5579e-01, PNorm = 62.3706, GNorm = 1.8953, lr_0 = 1.6524e-04
Loss = 3.1782e-01, PNorm = 62.3724, GNorm = 1.3484, lr_0 = 1.6513e-04
Loss = 3.1712e-01, PNorm = 62.3734, GNorm = 1.2345, lr_0 = 1.6502e-04
Loss = 3.4838e-01, PNorm = 62.3770, GNorm = 1.9444, lr_0 = 1.6490e-04
Loss = 3.5149e-01, PNorm = 62.3797, GNorm = 1.2190, lr_0 = 1.6479e-04
Loss = 3.4431e-01, PNorm = 62.3816, GNorm = 1.6066, lr_0 = 1.6468e-04
Loss = 3.5369e-01, PNorm = 62.3804, GNorm = 1.1557, lr_0 = 1.6457e-04
Loss = 3.3484e-01, PNorm = 62.3825, GNorm = 1.3875, lr_0 = 1.6445e-04
Loss = 3.2448e-01, PNorm = 62.3854, GNorm = 1.1892, lr_0 = 1.6434e-04
Loss = 3.5232e-01, PNorm = 62.3871, GNorm = 1.1698, lr_0 = 1.6423e-04
Loss = 2.9834e-01, PNorm = 62.3880, GNorm = 1.9737, lr_0 = 1.6412e-04
Loss = 3.4202e-01, PNorm = 62.3897, GNorm = 1.6299, lr_0 = 1.6400e-04
Loss = 3.4807e-01, PNorm = 62.3915, GNorm = 2.3040, lr_0 = 1.6389e-04
Loss = 3.2700e-01, PNorm = 62.3931, GNorm = 1.9427, lr_0 = 1.6378e-04
Validation mae = 0.110875
Epoch 24
Loss = 3.5301e-01, PNorm = 62.3941, GNorm = 1.7537, lr_0 = 1.6367e-04
Loss = 3.5594e-01, PNorm = 62.3957, GNorm = 1.3127, lr_0 = 1.6355e-04
Loss = 3.8704e-01, PNorm = 62.3967, GNorm = 1.4812, lr_0 = 1.6344e-04
Loss = 3.5221e-01, PNorm = 62.4016, GNorm = 1.5252, lr_0 = 1.6333e-04
Loss = 2.8687e-01, PNorm = 62.4037, GNorm = 1.2287, lr_0 = 1.6322e-04
Loss = 3.2499e-01, PNorm = 62.4029, GNorm = 1.4647, lr_0 = 1.6311e-04
Loss = 2.8588e-01, PNorm = 62.4023, GNorm = 1.5067, lr_0 = 1.6299e-04
Loss = 3.0525e-01, PNorm = 62.4023, GNorm = 1.2322, lr_0 = 1.6288e-04
Loss = 3.7427e-01, PNorm = 62.4046, GNorm = 1.5786, lr_0 = 1.6277e-04
Loss = 3.8289e-01, PNorm = 62.4057, GNorm = 1.4423, lr_0 = 1.6266e-04
Loss = 3.6734e-01, PNorm = 62.4048, GNorm = 1.3969, lr_0 = 1.6255e-04
Loss = 3.3122e-01, PNorm = 62.4050, GNorm = 1.4502, lr_0 = 1.6244e-04
Loss = 3.7299e-01, PNorm = 62.4069, GNorm = 1.9041, lr_0 = 1.6233e-04
Loss = 4.1203e-01, PNorm = 62.4076, GNorm = 1.5590, lr_0 = 1.6221e-04
Loss = 3.8443e-01, PNorm = 62.4107, GNorm = 1.3957, lr_0 = 1.6210e-04
Loss = 3.5842e-01, PNorm = 62.4145, GNorm = 1.2552, lr_0 = 1.6199e-04
Loss = 3.7771e-01, PNorm = 62.4140, GNorm = 1.5728, lr_0 = 1.6188e-04
Loss = 3.2892e-01, PNorm = 62.4161, GNorm = 1.4669, lr_0 = 1.6177e-04
Loss = 3.2229e-01, PNorm = 62.4201, GNorm = 1.4366, lr_0 = 1.6166e-04
Loss = 4.1368e-01, PNorm = 62.4214, GNorm = 1.9528, lr_0 = 1.6155e-04
Loss = 3.5212e-01, PNorm = 62.4231, GNorm = 2.0997, lr_0 = 1.6144e-04
Loss = 3.4234e-01, PNorm = 62.4248, GNorm = 1.6156, lr_0 = 1.6133e-04
Loss = 3.2319e-01, PNorm = 62.4263, GNorm = 1.2504, lr_0 = 1.6122e-04
Loss = 3.4835e-01, PNorm = 62.4280, GNorm = 1.8559, lr_0 = 1.6111e-04
Loss = 3.4547e-01, PNorm = 62.4319, GNorm = 1.1657, lr_0 = 1.6100e-04
Loss = 3.8198e-01, PNorm = 62.4341, GNorm = 1.5451, lr_0 = 1.6089e-04
Loss = 3.8615e-01, PNorm = 62.4383, GNorm = 1.4313, lr_0 = 1.6078e-04
Loss = 3.0913e-01, PNorm = 62.4422, GNorm = 1.7608, lr_0 = 1.6067e-04
Loss = 3.6879e-01, PNorm = 62.4415, GNorm = 1.6786, lr_0 = 1.6056e-04
Loss = 3.4662e-01, PNorm = 62.4432, GNorm = 1.1671, lr_0 = 1.6045e-04
Loss = 3.7654e-01, PNorm = 62.4424, GNorm = 1.4353, lr_0 = 1.6034e-04
Loss = 3.2985e-01, PNorm = 62.4440, GNorm = 1.5780, lr_0 = 1.6023e-04
Loss = 3.3870e-01, PNorm = 62.4447, GNorm = 1.3971, lr_0 = 1.6012e-04
Loss = 3.5670e-01, PNorm = 62.4440, GNorm = 1.4636, lr_0 = 1.6001e-04
Loss = 3.3246e-01, PNorm = 62.4467, GNorm = 1.3411, lr_0 = 1.5990e-04
Loss = 3.1043e-01, PNorm = 62.4484, GNorm = 1.3677, lr_0 = 1.5979e-04
Loss = 3.7632e-01, PNorm = 62.4490, GNorm = 1.4476, lr_0 = 1.5968e-04
Loss = 3.7064e-01, PNorm = 62.4499, GNorm = 1.5888, lr_0 = 1.5957e-04
Loss = 3.5762e-01, PNorm = 62.4511, GNorm = 1.2914, lr_0 = 1.5946e-04
Loss = 3.0850e-01, PNorm = 62.4505, GNorm = 1.8890, lr_0 = 1.5935e-04
Loss = 3.8319e-01, PNorm = 62.4483, GNorm = 1.4731, lr_0 = 1.5924e-04
Loss = 3.5862e-01, PNorm = 62.4517, GNorm = 1.3675, lr_0 = 1.5913e-04
Loss = 3.3094e-01, PNorm = 62.4547, GNorm = 1.7226, lr_0 = 1.5902e-04
Loss = 3.2783e-01, PNorm = 62.4541, GNorm = 1.2875, lr_0 = 1.5891e-04
Loss = 3.7557e-01, PNorm = 62.4551, GNorm = 1.2596, lr_0 = 1.5880e-04
Loss = 3.5555e-01, PNorm = 62.4590, GNorm = 0.8578, lr_0 = 1.5870e-04
Loss = 4.5183e-01, PNorm = 62.4606, GNorm = 2.2484, lr_0 = 1.5859e-04
Loss = 3.8442e-01, PNorm = 62.4609, GNorm = 1.6525, lr_0 = 1.5848e-04
Loss = 3.8914e-01, PNorm = 62.4612, GNorm = 1.9518, lr_0 = 1.5837e-04
Loss = 3.4559e-01, PNorm = 62.4634, GNorm = 1.5381, lr_0 = 1.5826e-04
Loss = 4.3087e-01, PNorm = 62.4655, GNorm = 1.7928, lr_0 = 1.5815e-04
Loss = 3.7083e-01, PNorm = 62.4665, GNorm = 1.8246, lr_0 = 1.5804e-04
Loss = 3.0559e-01, PNorm = 62.4691, GNorm = 1.3451, lr_0 = 1.5794e-04
Loss = 3.2254e-01, PNorm = 62.4705, GNorm = 1.1394, lr_0 = 1.5783e-04
Loss = 3.0811e-01, PNorm = 62.4704, GNorm = 1.7642, lr_0 = 1.5772e-04
Loss = 3.7464e-01, PNorm = 62.4697, GNorm = 1.8004, lr_0 = 1.5761e-04
Loss = 3.8501e-01, PNorm = 62.4706, GNorm = 1.6705, lr_0 = 1.5750e-04
Loss = 3.6091e-01, PNorm = 62.4734, GNorm = 1.4595, lr_0 = 1.5740e-04
Loss = 3.2820e-01, PNorm = 62.4744, GNorm = 1.5294, lr_0 = 1.5729e-04
Loss = 3.4234e-01, PNorm = 62.4766, GNorm = 1.5308, lr_0 = 1.5718e-04
Loss = 3.8591e-01, PNorm = 62.4768, GNorm = 1.9498, lr_0 = 1.5707e-04
Loss = 3.5063e-01, PNorm = 62.4802, GNorm = 1.4401, lr_0 = 1.5697e-04
Loss = 3.2395e-01, PNorm = 62.4811, GNorm = 1.4926, lr_0 = 1.5686e-04
Loss = 3.9511e-01, PNorm = 62.4830, GNorm = 1.5178, lr_0 = 1.5675e-04
Loss = 3.2568e-01, PNorm = 62.4856, GNorm = 2.3450, lr_0 = 1.5664e-04
Loss = 3.5408e-01, PNorm = 62.4879, GNorm = 1.3125, lr_0 = 1.5654e-04
Loss = 3.3508e-01, PNorm = 62.4890, GNorm = 1.3746, lr_0 = 1.5643e-04
Loss = 3.5909e-01, PNorm = 62.4925, GNorm = 1.1965, lr_0 = 1.5632e-04
Loss = 3.7066e-01, PNorm = 62.4957, GNorm = 1.0986, lr_0 = 1.5621e-04
Loss = 3.4563e-01, PNorm = 62.4971, GNorm = 1.3835, lr_0 = 1.5611e-04
Loss = 3.9074e-01, PNorm = 62.4989, GNorm = 1.9457, lr_0 = 1.5600e-04
Loss = 3.4959e-01, PNorm = 62.4991, GNorm = 1.2296, lr_0 = 1.5589e-04
Loss = 4.0100e-01, PNorm = 62.5006, GNorm = 1.0421, lr_0 = 1.5579e-04
Loss = 3.6398e-01, PNorm = 62.5008, GNorm = 1.5762, lr_0 = 1.5568e-04
Loss = 3.1948e-01, PNorm = 62.5025, GNorm = 1.8865, lr_0 = 1.5557e-04
Loss = 3.4344e-01, PNorm = 62.5036, GNorm = 2.0272, lr_0 = 1.5547e-04
Loss = 3.4795e-01, PNorm = 62.5051, GNorm = 1.1141, lr_0 = 1.5536e-04
Loss = 3.3874e-01, PNorm = 62.5068, GNorm = 1.3167, lr_0 = 1.5525e-04
Loss = 3.5635e-01, PNorm = 62.5123, GNorm = 1.1017, lr_0 = 1.5515e-04
Loss = 4.0942e-01, PNorm = 62.5163, GNorm = 1.5191, lr_0 = 1.5504e-04
Loss = 3.9377e-01, PNorm = 62.5176, GNorm = 1.1938, lr_0 = 1.5493e-04
Loss = 3.4012e-01, PNorm = 62.5175, GNorm = 1.3856, lr_0 = 1.5483e-04
Loss = 3.7216e-01, PNorm = 62.5178, GNorm = 1.9769, lr_0 = 1.5472e-04
Loss = 4.0032e-01, PNorm = 62.5175, GNorm = 1.4596, lr_0 = 1.5462e-04
Loss = 3.3223e-01, PNorm = 62.5203, GNorm = 1.0072, lr_0 = 1.5451e-04
Loss = 3.6635e-01, PNorm = 62.5217, GNorm = 1.3652, lr_0 = 1.5440e-04
Loss = 3.2262e-01, PNorm = 62.5232, GNorm = 1.6419, lr_0 = 1.5430e-04
Loss = 3.2826e-01, PNorm = 62.5235, GNorm = 1.4212, lr_0 = 1.5419e-04
Loss = 3.5765e-01, PNorm = 62.5240, GNorm = 1.4419, lr_0 = 1.5409e-04
Loss = 3.8013e-01, PNorm = 62.5262, GNorm = 1.0325, lr_0 = 1.5398e-04
Loss = 3.5037e-01, PNorm = 62.5281, GNorm = 1.6719, lr_0 = 1.5388e-04
Loss = 3.2909e-01, PNorm = 62.5291, GNorm = 1.1721, lr_0 = 1.5377e-04
Loss = 3.4888e-01, PNorm = 62.5306, GNorm = 1.6150, lr_0 = 1.5367e-04
Loss = 3.9878e-01, PNorm = 62.5317, GNorm = 1.8469, lr_0 = 1.5356e-04
Loss = 3.6128e-01, PNorm = 62.5338, GNorm = 1.5026, lr_0 = 1.5346e-04
Loss = 3.4302e-01, PNorm = 62.5369, GNorm = 1.5204, lr_0 = 1.5335e-04
Loss = 3.3604e-01, PNorm = 62.5390, GNorm = 1.2153, lr_0 = 1.5325e-04
Loss = 3.0213e-01, PNorm = 62.5388, GNorm = 1.1414, lr_0 = 1.5314e-04
Loss = 3.4723e-01, PNorm = 62.5394, GNorm = 1.0240, lr_0 = 1.5304e-04
Loss = 3.3349e-01, PNorm = 62.5399, GNorm = 1.3501, lr_0 = 1.5293e-04
Loss = 3.8895e-01, PNorm = 62.5400, GNorm = 1.2641, lr_0 = 1.5283e-04
Loss = 3.6986e-01, PNorm = 62.5423, GNorm = 1.5773, lr_0 = 1.5272e-04
Loss = 3.8411e-01, PNorm = 62.5446, GNorm = 2.2068, lr_0 = 1.5262e-04
Loss = 3.3877e-01, PNorm = 62.5460, GNorm = 1.2951, lr_0 = 1.5251e-04
Loss = 3.1163e-01, PNorm = 62.5450, GNorm = 1.3414, lr_0 = 1.5241e-04
Loss = 4.1119e-01, PNorm = 62.5452, GNorm = 1.4430, lr_0 = 1.5230e-04
Loss = 3.3981e-01, PNorm = 62.5472, GNorm = 1.3393, lr_0 = 1.5220e-04
Loss = 3.4129e-01, PNorm = 62.5475, GNorm = 1.5332, lr_0 = 1.5209e-04
Loss = 3.6457e-01, PNorm = 62.5478, GNorm = 1.9216, lr_0 = 1.5199e-04
Loss = 3.5570e-01, PNorm = 62.5493, GNorm = 1.5566, lr_0 = 1.5189e-04
Loss = 3.4196e-01, PNorm = 62.5514, GNorm = 1.7243, lr_0 = 1.5178e-04
Loss = 3.4008e-01, PNorm = 62.5518, GNorm = 1.5802, lr_0 = 1.5168e-04
Loss = 3.3291e-01, PNorm = 62.5527, GNorm = 1.4115, lr_0 = 1.5157e-04
Loss = 3.6731e-01, PNorm = 62.5565, GNorm = 1.2345, lr_0 = 1.5147e-04
Loss = 3.3690e-01, PNorm = 62.5567, GNorm = 1.4950, lr_0 = 1.5137e-04
Loss = 3.4223e-01, PNorm = 62.5575, GNorm = 1.3131, lr_0 = 1.5126e-04
Loss = 3.5590e-01, PNorm = 62.5592, GNorm = 1.1852, lr_0 = 1.5116e-04
Loss = 3.6766e-01, PNorm = 62.5610, GNorm = 1.3314, lr_0 = 1.5106e-04
Loss = 3.4444e-01, PNorm = 62.5600, GNorm = 0.9955, lr_0 = 1.5095e-04
Loss = 3.4695e-01, PNorm = 62.5611, GNorm = 1.5912, lr_0 = 1.5085e-04
Validation mae = 0.111555
Epoch 25
Loss = 3.4162e-01, PNorm = 62.5642, GNorm = 1.1709, lr_0 = 1.5075e-04
Loss = 3.1264e-01, PNorm = 62.5645, GNorm = 1.1448, lr_0 = 1.5064e-04
Loss = 3.1611e-01, PNorm = 62.5653, GNorm = 1.2744, lr_0 = 1.5054e-04
Loss = 3.3832e-01, PNorm = 62.5667, GNorm = 1.3541, lr_0 = 1.5044e-04
Loss = 3.3333e-01, PNorm = 62.5669, GNorm = 1.1531, lr_0 = 1.5033e-04
Loss = 3.5176e-01, PNorm = 62.5680, GNorm = 1.3148, lr_0 = 1.5023e-04
Loss = 3.3981e-01, PNorm = 62.5714, GNorm = 1.3484, lr_0 = 1.5013e-04
Loss = 3.8487e-01, PNorm = 62.5749, GNorm = 1.7445, lr_0 = 1.5002e-04
Loss = 2.9389e-01, PNorm = 62.5775, GNorm = 1.0624, lr_0 = 1.4992e-04
Loss = 3.1384e-01, PNorm = 62.5774, GNorm = 1.1787, lr_0 = 1.4982e-04
Loss = 3.5190e-01, PNorm = 62.5798, GNorm = 1.3769, lr_0 = 1.4972e-04
Loss = 3.6495e-01, PNorm = 62.5811, GNorm = 1.6927, lr_0 = 1.4961e-04
Loss = 3.1425e-01, PNorm = 62.5819, GNorm = 1.4458, lr_0 = 1.4951e-04
Loss = 3.7991e-01, PNorm = 62.5835, GNorm = 1.2729, lr_0 = 1.4941e-04
Loss = 3.6828e-01, PNorm = 62.5827, GNorm = 1.5463, lr_0 = 1.4931e-04
Loss = 3.1292e-01, PNorm = 62.5828, GNorm = 1.6519, lr_0 = 1.4920e-04
Loss = 3.4130e-01, PNorm = 62.5832, GNorm = 1.2732, lr_0 = 1.4910e-04
Loss = 3.4248e-01, PNorm = 62.5836, GNorm = 1.2439, lr_0 = 1.4900e-04
Loss = 3.1347e-01, PNorm = 62.5857, GNorm = 1.7279, lr_0 = 1.4890e-04
Loss = 3.7109e-01, PNorm = 62.5871, GNorm = 2.3017, lr_0 = 1.4880e-04
Loss = 3.2528e-01, PNorm = 62.5887, GNorm = 2.2968, lr_0 = 1.4869e-04
Loss = 3.3390e-01, PNorm = 62.5907, GNorm = 1.2741, lr_0 = 1.4859e-04
Loss = 3.1236e-01, PNorm = 62.5907, GNorm = 1.2675, lr_0 = 1.4849e-04
Loss = 3.1756e-01, PNorm = 62.5919, GNorm = 1.5385, lr_0 = 1.4839e-04
Loss = 3.4012e-01, PNorm = 62.5942, GNorm = 1.4310, lr_0 = 1.4829e-04
Loss = 3.1056e-01, PNorm = 62.5956, GNorm = 1.3035, lr_0 = 1.4818e-04
Loss = 3.5970e-01, PNorm = 62.5980, GNorm = 1.3232, lr_0 = 1.4808e-04
Loss = 3.2443e-01, PNorm = 62.6001, GNorm = 1.3766, lr_0 = 1.4798e-04
Loss = 3.1545e-01, PNorm = 62.6015, GNorm = 1.7017, lr_0 = 1.4788e-04
Loss = 3.1330e-01, PNorm = 62.6035, GNorm = 1.1410, lr_0 = 1.4778e-04
Loss = 4.1981e-01, PNorm = 62.6052, GNorm = 2.0650, lr_0 = 1.4768e-04
Loss = 3.7784e-01, PNorm = 62.6082, GNorm = 1.2434, lr_0 = 1.4758e-04
Loss = 2.8268e-01, PNorm = 62.6115, GNorm = 1.3160, lr_0 = 1.4748e-04
Loss = 3.9600e-01, PNorm = 62.6116, GNorm = 1.7505, lr_0 = 1.4737e-04
Loss = 3.2059e-01, PNorm = 62.6124, GNorm = 1.2601, lr_0 = 1.4727e-04
Loss = 3.5776e-01, PNorm = 62.6143, GNorm = 1.2739, lr_0 = 1.4717e-04
Loss = 3.3106e-01, PNorm = 62.6147, GNorm = 1.4675, lr_0 = 1.4707e-04
Loss = 3.6727e-01, PNorm = 62.6159, GNorm = 1.4364, lr_0 = 1.4697e-04
Loss = 3.4069e-01, PNorm = 62.6156, GNorm = 1.0761, lr_0 = 1.4687e-04
Loss = 3.3269e-01, PNorm = 62.6157, GNorm = 1.4094, lr_0 = 1.4677e-04
Loss = 3.8117e-01, PNorm = 62.6178, GNorm = 2.0588, lr_0 = 1.4667e-04
Loss = 3.5741e-01, PNorm = 62.6194, GNorm = 1.7784, lr_0 = 1.4657e-04
Loss = 4.0080e-01, PNorm = 62.6199, GNorm = 1.6154, lr_0 = 1.4647e-04
Loss = 3.1605e-01, PNorm = 62.6225, GNorm = 1.1396, lr_0 = 1.4637e-04
Loss = 3.4550e-01, PNorm = 62.6232, GNorm = 1.5596, lr_0 = 1.4627e-04
Loss = 3.9063e-01, PNorm = 62.6233, GNorm = 1.4811, lr_0 = 1.4617e-04
Loss = 3.6687e-01, PNorm = 62.6262, GNorm = 1.7088, lr_0 = 1.4607e-04
Loss = 3.0822e-01, PNorm = 62.6274, GNorm = 1.4567, lr_0 = 1.4597e-04
Loss = 3.9555e-01, PNorm = 62.6279, GNorm = 1.1304, lr_0 = 1.4587e-04
Loss = 3.6382e-01, PNorm = 62.6303, GNorm = 1.4976, lr_0 = 1.4577e-04
Loss = 3.4088e-01, PNorm = 62.6321, GNorm = 1.2473, lr_0 = 1.4567e-04
Loss = 3.3046e-01, PNorm = 62.6331, GNorm = 1.5217, lr_0 = 1.4557e-04
Loss = 3.4749e-01, PNorm = 62.6341, GNorm = 2.3116, lr_0 = 1.4547e-04
Loss = 3.3626e-01, PNorm = 62.6343, GNorm = 1.7799, lr_0 = 1.4537e-04
Loss = 3.3117e-01, PNorm = 62.6343, GNorm = 1.5325, lr_0 = 1.4527e-04
Loss = 3.3831e-01, PNorm = 62.6336, GNorm = 1.5859, lr_0 = 1.4517e-04
Loss = 3.5428e-01, PNorm = 62.6327, GNorm = 1.7129, lr_0 = 1.4507e-04
Loss = 3.8143e-01, PNorm = 62.6327, GNorm = 1.5218, lr_0 = 1.4497e-04
Loss = 3.5172e-01, PNorm = 62.6330, GNorm = 1.7796, lr_0 = 1.4487e-04
Loss = 3.7377e-01, PNorm = 62.6374, GNorm = 2.4032, lr_0 = 1.4477e-04
Loss = 3.8611e-01, PNorm = 62.6419, GNorm = 2.8333, lr_0 = 1.4467e-04
Loss = 3.3649e-01, PNorm = 62.6419, GNorm = 1.6588, lr_0 = 1.4457e-04
Loss = 3.7442e-01, PNorm = 62.6444, GNorm = 2.0352, lr_0 = 1.4447e-04
Loss = 3.9787e-01, PNorm = 62.6468, GNorm = 1.6955, lr_0 = 1.4438e-04
Loss = 3.5245e-01, PNorm = 62.6467, GNorm = 1.5945, lr_0 = 1.4428e-04
Loss = 3.4811e-01, PNorm = 62.6501, GNorm = 1.2026, lr_0 = 1.4418e-04
Loss = 3.6069e-01, PNorm = 62.6521, GNorm = 1.1967, lr_0 = 1.4408e-04
Loss = 3.1614e-01, PNorm = 62.6535, GNorm = 1.8979, lr_0 = 1.4398e-04
Loss = 4.7192e-01, PNorm = 62.6546, GNorm = 1.6709, lr_0 = 1.4388e-04
Loss = 3.7355e-01, PNorm = 62.6568, GNorm = 1.3249, lr_0 = 1.4378e-04
Loss = 3.6781e-01, PNorm = 62.6592, GNorm = 1.7815, lr_0 = 1.4368e-04
Loss = 3.3593e-01, PNorm = 62.6599, GNorm = 1.5324, lr_0 = 1.4359e-04
Loss = 3.9107e-01, PNorm = 62.6616, GNorm = 1.5812, lr_0 = 1.4349e-04
Loss = 3.7931e-01, PNorm = 62.6621, GNorm = 1.7134, lr_0 = 1.4339e-04
Loss = 3.7914e-01, PNorm = 62.6621, GNorm = 1.7413, lr_0 = 1.4329e-04
Loss = 3.7961e-01, PNorm = 62.6641, GNorm = 1.3637, lr_0 = 1.4319e-04
Loss = 3.1469e-01, PNorm = 62.6666, GNorm = 1.3509, lr_0 = 1.4310e-04
Loss = 3.5803e-01, PNorm = 62.6688, GNorm = 1.7237, lr_0 = 1.4300e-04
Loss = 3.3614e-01, PNorm = 62.6711, GNorm = 1.4769, lr_0 = 1.4290e-04
Loss = 3.3472e-01, PNorm = 62.6740, GNorm = 1.2641, lr_0 = 1.4280e-04
Loss = 4.1319e-01, PNorm = 62.6765, GNorm = 1.7516, lr_0 = 1.4270e-04
Loss = 3.1785e-01, PNorm = 62.6771, GNorm = 1.1649, lr_0 = 1.4261e-04
Loss = 3.6375e-01, PNorm = 62.6772, GNorm = 1.9565, lr_0 = 1.4251e-04
Loss = 3.5488e-01, PNorm = 62.6780, GNorm = 0.9729, lr_0 = 1.4241e-04
Loss = 3.4444e-01, PNorm = 62.6802, GNorm = 1.6867, lr_0 = 1.4231e-04
Loss = 3.7780e-01, PNorm = 62.6812, GNorm = 2.1688, lr_0 = 1.4222e-04
Loss = 3.8662e-01, PNorm = 62.6844, GNorm = 1.9632, lr_0 = 1.4212e-04
Loss = 3.5183e-01, PNorm = 62.6865, GNorm = 1.2604, lr_0 = 1.4202e-04
Loss = 3.1324e-01, PNorm = 62.6861, GNorm = 1.2926, lr_0 = 1.4192e-04
Loss = 3.6837e-01, PNorm = 62.6867, GNorm = 2.0485, lr_0 = 1.4183e-04
Loss = 3.4606e-01, PNorm = 62.6878, GNorm = 1.7455, lr_0 = 1.4173e-04
Loss = 3.5085e-01, PNorm = 62.6894, GNorm = 1.4324, lr_0 = 1.4163e-04
Loss = 3.4417e-01, PNorm = 62.6899, GNorm = 1.6059, lr_0 = 1.4153e-04
Loss = 3.2900e-01, PNorm = 62.6908, GNorm = 1.2891, lr_0 = 1.4144e-04
Loss = 3.2287e-01, PNorm = 62.6947, GNorm = 1.4133, lr_0 = 1.4134e-04
Loss = 4.1049e-01, PNorm = 62.6961, GNorm = 1.1630, lr_0 = 1.4124e-04
Loss = 3.1719e-01, PNorm = 62.6970, GNorm = 2.1108, lr_0 = 1.4115e-04
Loss = 3.6332e-01, PNorm = 62.6987, GNorm = 1.0991, lr_0 = 1.4105e-04
Loss = 3.5381e-01, PNorm = 62.6973, GNorm = 1.4369, lr_0 = 1.4095e-04
Loss = 4.1444e-01, PNorm = 62.6976, GNorm = 1.7990, lr_0 = 1.4086e-04
Loss = 3.0631e-01, PNorm = 62.6995, GNorm = 1.4319, lr_0 = 1.4076e-04
Loss = 3.6253e-01, PNorm = 62.7018, GNorm = 1.4016, lr_0 = 1.4066e-04
Loss = 3.9441e-01, PNorm = 62.7028, GNorm = 0.9528, lr_0 = 1.4057e-04
Loss = 3.3023e-01, PNorm = 62.7064, GNorm = 1.3092, lr_0 = 1.4047e-04
Loss = 3.2089e-01, PNorm = 62.7091, GNorm = 1.9845, lr_0 = 1.4038e-04
Loss = 3.1596e-01, PNorm = 62.7072, GNorm = 1.3740, lr_0 = 1.4028e-04
Loss = 3.8764e-01, PNorm = 62.7084, GNorm = 1.2433, lr_0 = 1.4018e-04
Loss = 3.4570e-01, PNorm = 62.7105, GNorm = 1.4157, lr_0 = 1.4009e-04
Loss = 3.7636e-01, PNorm = 62.7117, GNorm = 2.1743, lr_0 = 1.3999e-04
Loss = 3.5642e-01, PNorm = 62.7122, GNorm = 1.4994, lr_0 = 1.3990e-04
Loss = 3.6780e-01, PNorm = 62.7128, GNorm = 1.9866, lr_0 = 1.3980e-04
Loss = 4.1093e-01, PNorm = 62.7153, GNorm = 1.4249, lr_0 = 1.3970e-04
Loss = 3.4324e-01, PNorm = 62.7155, GNorm = 1.2755, lr_0 = 1.3961e-04
Loss = 3.5108e-01, PNorm = 62.7188, GNorm = 1.6767, lr_0 = 1.3951e-04
Loss = 3.7135e-01, PNorm = 62.7200, GNorm = 2.0878, lr_0 = 1.3942e-04
Loss = 3.3494e-01, PNorm = 62.7203, GNorm = 1.4014, lr_0 = 1.3932e-04
Loss = 3.5331e-01, PNorm = 62.7209, GNorm = 1.7101, lr_0 = 1.3923e-04
Loss = 3.5543e-01, PNorm = 62.7222, GNorm = 1.2982, lr_0 = 1.3913e-04
Loss = 3.3705e-01, PNorm = 62.7243, GNorm = 1.5075, lr_0 = 1.3904e-04
Loss = 3.3669e-01, PNorm = 62.7247, GNorm = 1.4676, lr_0 = 1.3894e-04
Validation mae = 0.110481
Epoch 26
Loss = 3.7561e-01, PNorm = 62.7259, GNorm = 1.3419, lr_0 = 1.3884e-04
Loss = 3.5664e-01, PNorm = 62.7274, GNorm = 1.1970, lr_0 = 1.3875e-04
Loss = 3.4966e-01, PNorm = 62.7276, GNorm = 1.8909, lr_0 = 1.3865e-04
Loss = 4.1941e-01, PNorm = 62.7293, GNorm = 1.4028, lr_0 = 1.3856e-04
Loss = 3.4097e-01, PNorm = 62.7313, GNorm = 1.9831, lr_0 = 1.3846e-04
Loss = 3.9988e-01, PNorm = 62.7325, GNorm = 1.4748, lr_0 = 1.3837e-04
Loss = 3.5119e-01, PNorm = 62.7351, GNorm = 1.3446, lr_0 = 1.3828e-04
Loss = 3.8309e-01, PNorm = 62.7363, GNorm = 1.8113, lr_0 = 1.3818e-04
Loss = 3.5286e-01, PNorm = 62.7370, GNorm = 1.2917, lr_0 = 1.3809e-04
Loss = 3.8632e-01, PNorm = 62.7391, GNorm = 2.1554, lr_0 = 1.3799e-04
Loss = 3.5337e-01, PNorm = 62.7406, GNorm = 1.5079, lr_0 = 1.3790e-04
Loss = 3.1996e-01, PNorm = 62.7430, GNorm = 1.6154, lr_0 = 1.3780e-04
Loss = 3.2997e-01, PNorm = 62.7455, GNorm = 1.2858, lr_0 = 1.3771e-04
Loss = 3.3559e-01, PNorm = 62.7469, GNorm = 1.4859, lr_0 = 1.3761e-04
Loss = 3.2957e-01, PNorm = 62.7469, GNorm = 1.4022, lr_0 = 1.3752e-04
Loss = 2.9846e-01, PNorm = 62.7499, GNorm = 1.1166, lr_0 = 1.3742e-04
Loss = 3.3454e-01, PNorm = 62.7528, GNorm = 1.4186, lr_0 = 1.3733e-04
Loss = 3.6420e-01, PNorm = 62.7542, GNorm = 1.8016, lr_0 = 1.3724e-04
Loss = 3.1037e-01, PNorm = 62.7540, GNorm = 1.9673, lr_0 = 1.3714e-04
Loss = 3.6956e-01, PNorm = 62.7549, GNorm = 1.2214, lr_0 = 1.3705e-04
Loss = 3.4608e-01, PNorm = 62.7563, GNorm = 1.4352, lr_0 = 1.3695e-04
Loss = 3.4925e-01, PNorm = 62.7577, GNorm = 1.3143, lr_0 = 1.3686e-04
Loss = 3.9837e-01, PNorm = 62.7591, GNorm = 1.6431, lr_0 = 1.3677e-04
Loss = 3.7536e-01, PNorm = 62.7607, GNorm = 1.3243, lr_0 = 1.3667e-04
Loss = 3.0821e-01, PNorm = 62.7599, GNorm = 1.3301, lr_0 = 1.3658e-04
Loss = 3.9786e-01, PNorm = 62.7598, GNorm = 1.8661, lr_0 = 1.3649e-04
Loss = 4.0509e-01, PNorm = 62.7613, GNorm = 1.8573, lr_0 = 1.3639e-04
Loss = 3.2249e-01, PNorm = 62.7626, GNorm = 1.4449, lr_0 = 1.3630e-04
Loss = 3.3540e-01, PNorm = 62.7647, GNorm = 1.4624, lr_0 = 1.3621e-04
Loss = 3.1845e-01, PNorm = 62.7658, GNorm = 1.3962, lr_0 = 1.3611e-04
Loss = 3.2939e-01, PNorm = 62.7668, GNorm = 1.7121, lr_0 = 1.3602e-04
Loss = 3.6401e-01, PNorm = 62.7679, GNorm = 2.6688, lr_0 = 1.3593e-04
Loss = 3.6404e-01, PNorm = 62.7669, GNorm = 2.0867, lr_0 = 1.3583e-04
Loss = 3.3284e-01, PNorm = 62.7660, GNorm = 1.8458, lr_0 = 1.3574e-04
Loss = 3.2791e-01, PNorm = 62.7666, GNorm = 1.3428, lr_0 = 1.3565e-04
Loss = 3.4840e-01, PNorm = 62.7659, GNorm = 1.3132, lr_0 = 1.3555e-04
Loss = 3.3417e-01, PNorm = 62.7670, GNorm = 1.4949, lr_0 = 1.3546e-04
Loss = 3.8989e-01, PNorm = 62.7708, GNorm = 2.0017, lr_0 = 1.3537e-04
Loss = 3.0379e-01, PNorm = 62.7728, GNorm = 1.1682, lr_0 = 1.3528e-04
Loss = 3.4524e-01, PNorm = 62.7727, GNorm = 1.4553, lr_0 = 1.3518e-04
Loss = 3.2515e-01, PNorm = 62.7726, GNorm = 1.4096, lr_0 = 1.3509e-04
Loss = 3.1712e-01, PNorm = 62.7726, GNorm = 1.0718, lr_0 = 1.3500e-04
Loss = 3.4566e-01, PNorm = 62.7724, GNorm = 1.4656, lr_0 = 1.3491e-04
Loss = 3.2692e-01, PNorm = 62.7741, GNorm = 1.3502, lr_0 = 1.3481e-04
Loss = 3.5252e-01, PNorm = 62.7757, GNorm = 1.7527, lr_0 = 1.3472e-04
Loss = 3.5809e-01, PNorm = 62.7762, GNorm = 1.5993, lr_0 = 1.3463e-04
Loss = 3.5694e-01, PNorm = 62.7771, GNorm = 2.2808, lr_0 = 1.3454e-04
Loss = 3.5082e-01, PNorm = 62.7799, GNorm = 1.2854, lr_0 = 1.3444e-04
Loss = 3.4460e-01, PNorm = 62.7804, GNorm = 1.5529, lr_0 = 1.3435e-04
Loss = 3.3335e-01, PNorm = 62.7808, GNorm = 1.4972, lr_0 = 1.3426e-04
Loss = 3.4114e-01, PNorm = 62.7828, GNorm = 1.3457, lr_0 = 1.3417e-04
Loss = 3.6061e-01, PNorm = 62.7850, GNorm = 1.7087, lr_0 = 1.3408e-04
Loss = 3.4047e-01, PNorm = 62.7866, GNorm = 1.6402, lr_0 = 1.3398e-04
Loss = 3.1543e-01, PNorm = 62.7878, GNorm = 1.2391, lr_0 = 1.3389e-04
Loss = 3.1162e-01, PNorm = 62.7908, GNorm = 1.3003, lr_0 = 1.3380e-04
Loss = 3.3056e-01, PNorm = 62.7923, GNorm = 1.5105, lr_0 = 1.3371e-04
Loss = 3.4401e-01, PNorm = 62.7945, GNorm = 1.2412, lr_0 = 1.3362e-04
Loss = 3.2390e-01, PNorm = 62.7961, GNorm = 1.2824, lr_0 = 1.3353e-04
Loss = 4.0111e-01, PNorm = 62.7980, GNorm = 1.8173, lr_0 = 1.3343e-04
Loss = 3.7871e-01, PNorm = 62.7983, GNorm = 1.5923, lr_0 = 1.3334e-04
Loss = 3.5817e-01, PNorm = 62.8012, GNorm = 1.7937, lr_0 = 1.3325e-04
Loss = 3.3141e-01, PNorm = 62.8021, GNorm = 1.7052, lr_0 = 1.3316e-04
Loss = 3.7128e-01, PNorm = 62.8038, GNorm = 1.3547, lr_0 = 1.3307e-04
Loss = 3.8552e-01, PNorm = 62.8056, GNorm = 1.2508, lr_0 = 1.3298e-04
Loss = 3.3213e-01, PNorm = 62.8080, GNorm = 1.7999, lr_0 = 1.3289e-04
Loss = 3.4858e-01, PNorm = 62.8084, GNorm = 1.4610, lr_0 = 1.3280e-04
Loss = 3.6183e-01, PNorm = 62.8084, GNorm = 1.7335, lr_0 = 1.3270e-04
Loss = 3.4318e-01, PNorm = 62.8096, GNorm = 1.8398, lr_0 = 1.3261e-04
Loss = 3.4831e-01, PNorm = 62.8110, GNorm = 1.5341, lr_0 = 1.3252e-04
Loss = 3.2109e-01, PNorm = 62.8135, GNorm = 1.3215, lr_0 = 1.3243e-04
Loss = 3.4277e-01, PNorm = 62.8156, GNorm = 1.4080, lr_0 = 1.3234e-04
Loss = 3.4729e-01, PNorm = 62.8157, GNorm = 1.3568, lr_0 = 1.3225e-04
Loss = 3.6825e-01, PNorm = 62.8151, GNorm = 1.2898, lr_0 = 1.3216e-04
Loss = 3.6531e-01, PNorm = 62.8175, GNorm = 1.9397, lr_0 = 1.3207e-04
Loss = 3.3020e-01, PNorm = 62.8211, GNorm = 1.6671, lr_0 = 1.3198e-04
Loss = 3.2763e-01, PNorm = 62.8216, GNorm = 1.4372, lr_0 = 1.3189e-04
Loss = 3.3241e-01, PNorm = 62.8229, GNorm = 1.3710, lr_0 = 1.3180e-04
Loss = 3.7073e-01, PNorm = 62.8235, GNorm = 1.9213, lr_0 = 1.3171e-04
Loss = 3.4077e-01, PNorm = 62.8232, GNorm = 1.3672, lr_0 = 1.3162e-04
Loss = 3.4765e-01, PNorm = 62.8241, GNorm = 1.9934, lr_0 = 1.3153e-04
Loss = 3.1792e-01, PNorm = 62.8243, GNorm = 1.4957, lr_0 = 1.3144e-04
Loss = 3.3463e-01, PNorm = 62.8250, GNorm = 1.6212, lr_0 = 1.3135e-04
Loss = 3.6766e-01, PNorm = 62.8275, GNorm = 1.4119, lr_0 = 1.3126e-04
Loss = 3.0498e-01, PNorm = 62.8296, GNorm = 1.8796, lr_0 = 1.3117e-04
Loss = 3.3716e-01, PNorm = 62.8316, GNorm = 1.5859, lr_0 = 1.3108e-04
Loss = 3.2191e-01, PNorm = 62.8350, GNorm = 1.1908, lr_0 = 1.3099e-04
Loss = 3.6255e-01, PNorm = 62.8344, GNorm = 1.5692, lr_0 = 1.3090e-04
Loss = 3.1411e-01, PNorm = 62.8338, GNorm = 1.6175, lr_0 = 1.3081e-04
Loss = 3.3232e-01, PNorm = 62.8350, GNorm = 1.6843, lr_0 = 1.3072e-04
Loss = 3.0948e-01, PNorm = 62.8374, GNorm = 1.1195, lr_0 = 1.3063e-04
Loss = 3.5627e-01, PNorm = 62.8376, GNorm = 1.7529, lr_0 = 1.3054e-04
Loss = 3.4986e-01, PNorm = 62.8377, GNorm = 1.3876, lr_0 = 1.3045e-04
Loss = 3.4120e-01, PNorm = 62.8378, GNorm = 1.4413, lr_0 = 1.3036e-04
Loss = 3.2574e-01, PNorm = 62.8398, GNorm = 1.2968, lr_0 = 1.3027e-04
Loss = 3.5250e-01, PNorm = 62.8415, GNorm = 1.5033, lr_0 = 1.3018e-04
Loss = 3.8446e-01, PNorm = 62.8420, GNorm = 1.2683, lr_0 = 1.3009e-04
Loss = 3.7402e-01, PNorm = 62.8428, GNorm = 1.2138, lr_0 = 1.3000e-04
Loss = 3.4937e-01, PNorm = 62.8450, GNorm = 1.3198, lr_0 = 1.2992e-04
Loss = 3.6527e-01, PNorm = 62.8465, GNorm = 1.5438, lr_0 = 1.2983e-04
Loss = 3.5271e-01, PNorm = 62.8468, GNorm = 1.4380, lr_0 = 1.2974e-04
Loss = 3.6609e-01, PNorm = 62.8465, GNorm = 1.2477, lr_0 = 1.2965e-04
Loss = 3.4809e-01, PNorm = 62.8494, GNorm = 1.0625, lr_0 = 1.2956e-04
Loss = 3.3841e-01, PNorm = 62.8516, GNorm = 1.8525, lr_0 = 1.2947e-04
Loss = 3.5192e-01, PNorm = 62.8520, GNorm = 1.6801, lr_0 = 1.2938e-04
Loss = 3.1697e-01, PNorm = 62.8521, GNorm = 1.4821, lr_0 = 1.2929e-04
Loss = 3.9126e-01, PNorm = 62.8522, GNorm = 1.1098, lr_0 = 1.2921e-04
Loss = 3.6444e-01, PNorm = 62.8527, GNorm = 1.3832, lr_0 = 1.2912e-04
Loss = 3.7462e-01, PNorm = 62.8554, GNorm = 1.0388, lr_0 = 1.2903e-04
Loss = 3.3742e-01, PNorm = 62.8579, GNorm = 1.4273, lr_0 = 1.2894e-04
Loss = 3.5064e-01, PNorm = 62.8594, GNorm = 1.6850, lr_0 = 1.2885e-04
Loss = 3.3414e-01, PNorm = 62.8603, GNorm = 1.3975, lr_0 = 1.2876e-04
Loss = 3.8983e-01, PNorm = 62.8621, GNorm = 1.3345, lr_0 = 1.2867e-04
Loss = 3.8705e-01, PNorm = 62.8631, GNorm = 1.0590, lr_0 = 1.2859e-04
Loss = 3.2025e-01, PNorm = 62.8639, GNorm = 1.9325, lr_0 = 1.2850e-04
Loss = 3.6549e-01, PNorm = 62.8646, GNorm = 1.5663, lr_0 = 1.2841e-04
Loss = 3.2410e-01, PNorm = 62.8660, GNorm = 1.4201, lr_0 = 1.2832e-04
Loss = 3.4524e-01, PNorm = 62.8667, GNorm = 1.4115, lr_0 = 1.2823e-04
Loss = 3.2803e-01, PNorm = 62.8676, GNorm = 1.1689, lr_0 = 1.2815e-04
Loss = 3.2472e-01, PNorm = 62.8685, GNorm = 1.2712, lr_0 = 1.2806e-04
Loss = 3.6129e-01, PNorm = 62.8692, GNorm = 1.4205, lr_0 = 1.2797e-04
Validation mae = 0.110858
Epoch 27
Loss = 3.4358e-01, PNorm = 62.8700, GNorm = 2.1955, lr_0 = 1.2788e-04
Loss = 3.4847e-01, PNorm = 62.8731, GNorm = 1.6337, lr_0 = 1.2780e-04
Loss = 3.7555e-01, PNorm = 62.8752, GNorm = 2.7984, lr_0 = 1.2771e-04
Loss = 3.6900e-01, PNorm = 62.8739, GNorm = 2.1169, lr_0 = 1.2762e-04
Loss = 3.4417e-01, PNorm = 62.8746, GNorm = 1.1501, lr_0 = 1.2753e-04
Loss = 3.5210e-01, PNorm = 62.8757, GNorm = 1.5385, lr_0 = 1.2745e-04
Loss = 3.2771e-01, PNorm = 62.8785, GNorm = 1.6759, lr_0 = 1.2736e-04
Loss = 3.3763e-01, PNorm = 62.8790, GNorm = 1.8259, lr_0 = 1.2727e-04
Loss = 3.5141e-01, PNorm = 62.8807, GNorm = 1.3496, lr_0 = 1.2718e-04
Loss = 3.3763e-01, PNorm = 62.8838, GNorm = 1.4893, lr_0 = 1.2710e-04
Loss = 3.1964e-01, PNorm = 62.8846, GNorm = 1.4469, lr_0 = 1.2701e-04
Loss = 3.6574e-01, PNorm = 62.8850, GNorm = 2.1941, lr_0 = 1.2692e-04
Loss = 3.3643e-01, PNorm = 62.8855, GNorm = 1.3304, lr_0 = 1.2684e-04
Loss = 3.8868e-01, PNorm = 62.8859, GNorm = 1.6582, lr_0 = 1.2675e-04
Loss = 3.2940e-01, PNorm = 62.8877, GNorm = 2.0766, lr_0 = 1.2666e-04
Loss = 3.7469e-01, PNorm = 62.8876, GNorm = 1.9156, lr_0 = 1.2658e-04
Loss = 3.6416e-01, PNorm = 62.8886, GNorm = 1.4132, lr_0 = 1.2649e-04
Loss = 3.6205e-01, PNorm = 62.8907, GNorm = 1.8261, lr_0 = 1.2640e-04
Loss = 2.9148e-01, PNorm = 62.8921, GNorm = 0.9987, lr_0 = 1.2632e-04
Loss = 3.5922e-01, PNorm = 62.8906, GNorm = 1.2386, lr_0 = 1.2623e-04
Loss = 4.0235e-01, PNorm = 62.8911, GNorm = 1.4396, lr_0 = 1.2614e-04
Loss = 3.5149e-01, PNorm = 62.8931, GNorm = 1.1323, lr_0 = 1.2606e-04
Loss = 3.2042e-01, PNorm = 62.8939, GNorm = 1.6010, lr_0 = 1.2597e-04
Loss = 2.8455e-01, PNorm = 62.8967, GNorm = 1.3091, lr_0 = 1.2588e-04
Loss = 3.5835e-01, PNorm = 62.8963, GNorm = 2.0760, lr_0 = 1.2580e-04
Loss = 3.0076e-01, PNorm = 62.8966, GNorm = 1.6081, lr_0 = 1.2571e-04
Loss = 3.2499e-01, PNorm = 62.8990, GNorm = 1.8007, lr_0 = 1.2563e-04
Loss = 3.4713e-01, PNorm = 62.8991, GNorm = 1.3623, lr_0 = 1.2554e-04
Loss = 2.9932e-01, PNorm = 62.8995, GNorm = 1.8729, lr_0 = 1.2545e-04
Loss = 3.7524e-01, PNorm = 62.9004, GNorm = 1.1259, lr_0 = 1.2537e-04
Loss = 3.2998e-01, PNorm = 62.9006, GNorm = 1.6963, lr_0 = 1.2528e-04
Loss = 3.2614e-01, PNorm = 62.9012, GNorm = 1.4982, lr_0 = 1.2520e-04
Loss = 3.2379e-01, PNorm = 62.9031, GNorm = 1.7699, lr_0 = 1.2511e-04
Loss = 3.3531e-01, PNorm = 62.9049, GNorm = 1.1594, lr_0 = 1.2502e-04
Loss = 3.2941e-01, PNorm = 62.9066, GNorm = 1.4739, lr_0 = 1.2494e-04
Loss = 3.8032e-01, PNorm = 62.9077, GNorm = 2.3594, lr_0 = 1.2485e-04
Loss = 3.1947e-01, PNorm = 62.9083, GNorm = 1.4873, lr_0 = 1.2477e-04
Loss = 3.7271e-01, PNorm = 62.9076, GNorm = 1.7523, lr_0 = 1.2468e-04
Loss = 3.2794e-01, PNorm = 62.9073, GNorm = 1.7590, lr_0 = 1.2460e-04
Loss = 3.0455e-01, PNorm = 62.9072, GNorm = 1.8732, lr_0 = 1.2451e-04
Loss = 3.2131e-01, PNorm = 62.9081, GNorm = 1.5672, lr_0 = 1.2443e-04
Loss = 3.7433e-01, PNorm = 62.9093, GNorm = 1.1685, lr_0 = 1.2434e-04
Loss = 3.1177e-01, PNorm = 62.9127, GNorm = 1.3488, lr_0 = 1.2426e-04
Loss = 3.5174e-01, PNorm = 62.9147, GNorm = 1.4365, lr_0 = 1.2417e-04
Loss = 3.5062e-01, PNorm = 62.9167, GNorm = 1.6568, lr_0 = 1.2409e-04
Loss = 3.8247e-01, PNorm = 62.9176, GNorm = 1.5673, lr_0 = 1.2400e-04
Loss = 3.6556e-01, PNorm = 62.9183, GNorm = 2.2948, lr_0 = 1.2392e-04
Loss = 3.2575e-01, PNorm = 62.9185, GNorm = 1.4059, lr_0 = 1.2383e-04
Loss = 3.1552e-01, PNorm = 62.9199, GNorm = 1.3119, lr_0 = 1.2375e-04
Loss = 3.5709e-01, PNorm = 62.9229, GNorm = 1.3292, lr_0 = 1.2366e-04
Loss = 4.3132e-01, PNorm = 62.9250, GNorm = 1.4332, lr_0 = 1.2358e-04
Loss = 3.3776e-01, PNorm = 62.9266, GNorm = 1.6573, lr_0 = 1.2349e-04
Loss = 3.7794e-01, PNorm = 62.9275, GNorm = 1.5805, lr_0 = 1.2341e-04
Loss = 3.2038e-01, PNorm = 62.9282, GNorm = 1.9892, lr_0 = 1.2332e-04
Loss = 3.1398e-01, PNorm = 62.9307, GNorm = 1.4563, lr_0 = 1.2324e-04
Loss = 3.5129e-01, PNorm = 62.9318, GNorm = 1.5202, lr_0 = 1.2315e-04
Loss = 3.4521e-01, PNorm = 62.9323, GNorm = 1.8559, lr_0 = 1.2307e-04
Loss = 3.3058e-01, PNorm = 62.9326, GNorm = 1.4898, lr_0 = 1.2298e-04
Loss = 3.2786e-01, PNorm = 62.9338, GNorm = 1.9424, lr_0 = 1.2290e-04
Loss = 3.1724e-01, PNorm = 62.9354, GNorm = 1.1149, lr_0 = 1.2282e-04
Loss = 3.4268e-01, PNorm = 62.9358, GNorm = 1.8460, lr_0 = 1.2273e-04
Loss = 3.5541e-01, PNorm = 62.9375, GNorm = 2.2394, lr_0 = 1.2265e-04
Loss = 3.6836e-01, PNorm = 62.9394, GNorm = 1.2420, lr_0 = 1.2256e-04
Loss = 2.9508e-01, PNorm = 62.9408, GNorm = 1.1249, lr_0 = 1.2248e-04
Loss = 4.1299e-01, PNorm = 62.9402, GNorm = 1.3247, lr_0 = 1.2240e-04
Loss = 3.2291e-01, PNorm = 62.9419, GNorm = 1.2157, lr_0 = 1.2231e-04
Loss = 3.5933e-01, PNorm = 62.9448, GNorm = 1.9022, lr_0 = 1.2223e-04
Loss = 3.2945e-01, PNorm = 62.9456, GNorm = 1.5805, lr_0 = 1.2214e-04
Loss = 3.1688e-01, PNorm = 62.9448, GNorm = 1.5122, lr_0 = 1.2206e-04
Loss = 3.2123e-01, PNorm = 62.9460, GNorm = 2.1858, lr_0 = 1.2198e-04
Loss = 2.9955e-01, PNorm = 62.9481, GNorm = 1.2414, lr_0 = 1.2189e-04
Loss = 3.9851e-01, PNorm = 62.9470, GNorm = 2.3140, lr_0 = 1.2181e-04
Loss = 3.1983e-01, PNorm = 62.9474, GNorm = 1.5343, lr_0 = 1.2173e-04
Loss = 3.3406e-01, PNorm = 62.9478, GNorm = 1.3285, lr_0 = 1.2164e-04
Loss = 3.5445e-01, PNorm = 62.9476, GNorm = 1.1059, lr_0 = 1.2156e-04
Loss = 3.3063e-01, PNorm = 62.9484, GNorm = 2.3645, lr_0 = 1.2148e-04
Loss = 3.6387e-01, PNorm = 62.9516, GNorm = 1.3117, lr_0 = 1.2139e-04
Loss = 3.4338e-01, PNorm = 62.9521, GNorm = 1.1940, lr_0 = 1.2131e-04
Loss = 3.5736e-01, PNorm = 62.9536, GNorm = 1.5470, lr_0 = 1.2123e-04
Loss = 3.4925e-01, PNorm = 62.9549, GNorm = 1.4487, lr_0 = 1.2114e-04
Loss = 3.1628e-01, PNorm = 62.9559, GNorm = 1.3758, lr_0 = 1.2106e-04
Loss = 3.3733e-01, PNorm = 62.9550, GNorm = 1.3992, lr_0 = 1.2098e-04
Loss = 3.9897e-01, PNorm = 62.9546, GNorm = 1.9500, lr_0 = 1.2090e-04
Loss = 3.7818e-01, PNorm = 62.9559, GNorm = 1.5054, lr_0 = 1.2081e-04
Loss = 3.8928e-01, PNorm = 62.9546, GNorm = 1.7704, lr_0 = 1.2073e-04
Loss = 3.3340e-01, PNorm = 62.9554, GNorm = 1.2881, lr_0 = 1.2065e-04
Loss = 3.4574e-01, PNorm = 62.9567, GNorm = 1.2676, lr_0 = 1.2056e-04
Loss = 3.4006e-01, PNorm = 62.9588, GNorm = 1.8340, lr_0 = 1.2048e-04
Loss = 3.5420e-01, PNorm = 62.9594, GNorm = 1.6504, lr_0 = 1.2040e-04
Loss = 3.3699e-01, PNorm = 62.9610, GNorm = 1.4138, lr_0 = 1.2032e-04
Loss = 3.6354e-01, PNorm = 62.9624, GNorm = 1.6089, lr_0 = 1.2023e-04
Loss = 3.6845e-01, PNorm = 62.9616, GNorm = 2.2285, lr_0 = 1.2015e-04
Loss = 3.9534e-01, PNorm = 62.9631, GNorm = 1.9175, lr_0 = 1.2007e-04
Loss = 3.3511e-01, PNorm = 62.9642, GNorm = 1.5942, lr_0 = 1.1999e-04
Loss = 3.4005e-01, PNorm = 62.9647, GNorm = 1.3389, lr_0 = 1.1991e-04
Loss = 3.2777e-01, PNorm = 62.9669, GNorm = 1.5642, lr_0 = 1.1982e-04
Loss = 3.3984e-01, PNorm = 62.9684, GNorm = 1.2820, lr_0 = 1.1974e-04
Loss = 3.7383e-01, PNorm = 62.9686, GNorm = 1.4134, lr_0 = 1.1966e-04
Loss = 3.2171e-01, PNorm = 62.9676, GNorm = 1.4888, lr_0 = 1.1958e-04
Loss = 3.2862e-01, PNorm = 62.9681, GNorm = 1.5240, lr_0 = 1.1950e-04
Loss = 3.3171e-01, PNorm = 62.9698, GNorm = 1.7966, lr_0 = 1.1941e-04
Loss = 3.4391e-01, PNorm = 62.9711, GNorm = 1.6370, lr_0 = 1.1933e-04
Loss = 4.0066e-01, PNorm = 62.9712, GNorm = 1.5152, lr_0 = 1.1925e-04
Loss = 3.1504e-01, PNorm = 62.9722, GNorm = 1.2574, lr_0 = 1.1917e-04
Loss = 3.4860e-01, PNorm = 62.9736, GNorm = 1.3012, lr_0 = 1.1909e-04
Loss = 3.6105e-01, PNorm = 62.9760, GNorm = 1.2882, lr_0 = 1.1901e-04
Loss = 3.5222e-01, PNorm = 62.9753, GNorm = 1.3354, lr_0 = 1.1892e-04
Loss = 3.3260e-01, PNorm = 62.9754, GNorm = 1.8802, lr_0 = 1.1884e-04
Loss = 3.9300e-01, PNorm = 62.9762, GNorm = 1.3812, lr_0 = 1.1876e-04
Loss = 3.7274e-01, PNorm = 62.9790, GNorm = 2.3816, lr_0 = 1.1868e-04
Loss = 3.1528e-01, PNorm = 62.9804, GNorm = 1.3897, lr_0 = 1.1860e-04
Loss = 3.4037e-01, PNorm = 62.9798, GNorm = 1.8559, lr_0 = 1.1852e-04
Loss = 3.4887e-01, PNorm = 62.9802, GNorm = 1.3129, lr_0 = 1.1844e-04
Loss = 3.8198e-01, PNorm = 62.9815, GNorm = 1.1719, lr_0 = 1.1835e-04
Loss = 3.4231e-01, PNorm = 62.9822, GNorm = 1.3563, lr_0 = 1.1827e-04
Loss = 3.8349e-01, PNorm = 62.9823, GNorm = 1.9015, lr_0 = 1.1819e-04
Loss = 3.3964e-01, PNorm = 62.9827, GNorm = 0.9929, lr_0 = 1.1811e-04
Loss = 3.8213e-01, PNorm = 62.9846, GNorm = 1.1532, lr_0 = 1.1803e-04
Loss = 3.1272e-01, PNorm = 62.9857, GNorm = 1.2823, lr_0 = 1.1795e-04
Loss = 3.2649e-01, PNorm = 62.9853, GNorm = 1.2917, lr_0 = 1.1787e-04
Validation mae = 0.111034
Epoch 28
Loss = 3.4867e-01, PNorm = 62.9860, GNorm = 1.2890, lr_0 = 1.1779e-04
Loss = 3.3407e-01, PNorm = 62.9867, GNorm = 1.6239, lr_0 = 1.1771e-04
Loss = 3.8140e-01, PNorm = 62.9894, GNorm = 1.9584, lr_0 = 1.1763e-04
Loss = 3.8966e-01, PNorm = 62.9921, GNorm = 2.0549, lr_0 = 1.1755e-04
Loss = 4.0256e-01, PNorm = 62.9924, GNorm = 2.7536, lr_0 = 1.1747e-04
Loss = 3.2786e-01, PNorm = 62.9922, GNorm = 1.6643, lr_0 = 1.1739e-04
Loss = 3.3811e-01, PNorm = 62.9923, GNorm = 1.4211, lr_0 = 1.1730e-04
Loss = 3.4433e-01, PNorm = 62.9941, GNorm = 0.9951, lr_0 = 1.1722e-04
Loss = 3.4453e-01, PNorm = 62.9942, GNorm = 1.2040, lr_0 = 1.1714e-04
Loss = 3.0703e-01, PNorm = 62.9956, GNorm = 1.1380, lr_0 = 1.1706e-04
Loss = 3.6859e-01, PNorm = 62.9974, GNorm = 1.9875, lr_0 = 1.1698e-04
Loss = 3.4364e-01, PNorm = 62.9990, GNorm = 1.6283, lr_0 = 1.1690e-04
Loss = 3.5493e-01, PNorm = 62.9996, GNorm = 1.0774, lr_0 = 1.1682e-04
Loss = 3.1797e-01, PNorm = 63.0012, GNorm = 1.5125, lr_0 = 1.1674e-04
Loss = 3.6876e-01, PNorm = 63.0016, GNorm = 1.7167, lr_0 = 1.1666e-04
Loss = 4.0861e-01, PNorm = 63.0000, GNorm = 1.6179, lr_0 = 1.1658e-04
Loss = 3.5452e-01, PNorm = 63.0000, GNorm = 1.4272, lr_0 = 1.1650e-04
Loss = 3.4330e-01, PNorm = 63.0011, GNorm = 1.6301, lr_0 = 1.1642e-04
Loss = 3.5208e-01, PNorm = 63.0028, GNorm = 1.9198, lr_0 = 1.1634e-04
Loss = 3.5097e-01, PNorm = 63.0049, GNorm = 1.5960, lr_0 = 1.1626e-04
Loss = 3.3270e-01, PNorm = 63.0072, GNorm = 1.3540, lr_0 = 1.1618e-04
Loss = 3.3783e-01, PNorm = 63.0074, GNorm = 1.6885, lr_0 = 1.1611e-04
Loss = 2.9511e-01, PNorm = 63.0084, GNorm = 1.2765, lr_0 = 1.1603e-04
Loss = 3.4185e-01, PNorm = 63.0080, GNorm = 1.3792, lr_0 = 1.1595e-04
Loss = 3.5926e-01, PNorm = 63.0096, GNorm = 1.3974, lr_0 = 1.1587e-04
Loss = 3.0366e-01, PNorm = 63.0122, GNorm = 1.4282, lr_0 = 1.1579e-04
Loss = 3.9621e-01, PNorm = 63.0149, GNorm = 1.8522, lr_0 = 1.1571e-04
Loss = 3.1762e-01, PNorm = 63.0163, GNorm = 1.7290, lr_0 = 1.1563e-04
Loss = 3.7416e-01, PNorm = 63.0154, GNorm = 2.3047, lr_0 = 1.1555e-04
Loss = 3.5500e-01, PNorm = 63.0152, GNorm = 1.6071, lr_0 = 1.1547e-04
Loss = 3.5296e-01, PNorm = 63.0171, GNorm = 1.5813, lr_0 = 1.1539e-04
Loss = 3.6906e-01, PNorm = 63.0169, GNorm = 1.6949, lr_0 = 1.1531e-04
Loss = 3.4673e-01, PNorm = 63.0176, GNorm = 1.0737, lr_0 = 1.1523e-04
Loss = 2.8301e-01, PNorm = 63.0195, GNorm = 1.3161, lr_0 = 1.1515e-04
Loss = 3.2877e-01, PNorm = 63.0194, GNorm = 1.7954, lr_0 = 1.1508e-04
Loss = 3.8102e-01, PNorm = 63.0198, GNorm = 2.0484, lr_0 = 1.1500e-04
Loss = 3.3907e-01, PNorm = 63.0222, GNorm = 1.3814, lr_0 = 1.1492e-04
Loss = 3.6345e-01, PNorm = 63.0237, GNorm = 1.6607, lr_0 = 1.1484e-04
Loss = 3.4045e-01, PNorm = 63.0252, GNorm = 1.2832, lr_0 = 1.1476e-04
Loss = 3.0085e-01, PNorm = 63.0259, GNorm = 1.1797, lr_0 = 1.1468e-04
Loss = 3.5512e-01, PNorm = 63.0275, GNorm = 1.9844, lr_0 = 1.1460e-04
Loss = 3.4398e-01, PNorm = 63.0290, GNorm = 1.4951, lr_0 = 1.1452e-04
Loss = 3.4744e-01, PNorm = 63.0312, GNorm = 1.6509, lr_0 = 1.1445e-04
Loss = 3.4572e-01, PNorm = 63.0315, GNorm = 1.6255, lr_0 = 1.1437e-04
Loss = 2.7523e-01, PNorm = 63.0326, GNorm = 1.9660, lr_0 = 1.1429e-04
Loss = 3.2779e-01, PNorm = 63.0317, GNorm = 1.5163, lr_0 = 1.1421e-04
Loss = 3.4481e-01, PNorm = 63.0336, GNorm = 1.2591, lr_0 = 1.1413e-04
Loss = 3.2940e-01, PNorm = 63.0356, GNorm = 1.7825, lr_0 = 1.1405e-04
Loss = 3.4660e-01, PNorm = 63.0361, GNorm = 1.6267, lr_0 = 1.1398e-04
Loss = 3.5903e-01, PNorm = 63.0380, GNorm = 1.1787, lr_0 = 1.1390e-04
Loss = 3.6436e-01, PNorm = 63.0383, GNorm = 1.5879, lr_0 = 1.1382e-04
Loss = 3.5176e-01, PNorm = 63.0374, GNorm = 1.5638, lr_0 = 1.1374e-04
Loss = 3.4063e-01, PNorm = 63.0388, GNorm = 1.6206, lr_0 = 1.1366e-04
Loss = 3.2899e-01, PNorm = 63.0391, GNorm = 1.2302, lr_0 = 1.1359e-04
Loss = 3.7500e-01, PNorm = 63.0396, GNorm = 1.7189, lr_0 = 1.1351e-04
Loss = 3.0629e-01, PNorm = 63.0405, GNorm = 1.3745, lr_0 = 1.1343e-04
Loss = 3.6433e-01, PNorm = 63.0408, GNorm = 1.0141, lr_0 = 1.1335e-04
Loss = 3.3798e-01, PNorm = 63.0399, GNorm = 1.3667, lr_0 = 1.1328e-04
Loss = 3.6187e-01, PNorm = 63.0386, GNorm = 1.5804, lr_0 = 1.1320e-04
Loss = 3.5117e-01, PNorm = 63.0404, GNorm = 1.5798, lr_0 = 1.1312e-04
Loss = 3.2394e-01, PNorm = 63.0433, GNorm = 2.0326, lr_0 = 1.1304e-04
Loss = 3.1242e-01, PNorm = 63.0449, GNorm = 1.5506, lr_0 = 1.1297e-04
Loss = 3.6672e-01, PNorm = 63.0467, GNorm = 1.3689, lr_0 = 1.1289e-04
Loss = 3.3885e-01, PNorm = 63.0473, GNorm = 1.5273, lr_0 = 1.1281e-04
Loss = 3.7641e-01, PNorm = 63.0476, GNorm = 1.3025, lr_0 = 1.1273e-04
Loss = 3.2182e-01, PNorm = 63.0487, GNorm = 1.6588, lr_0 = 1.1266e-04
Loss = 3.6067e-01, PNorm = 63.0493, GNorm = 1.2385, lr_0 = 1.1258e-04
Loss = 3.4502e-01, PNorm = 63.0498, GNorm = 1.4888, lr_0 = 1.1250e-04
Loss = 2.9572e-01, PNorm = 63.0515, GNorm = 1.4903, lr_0 = 1.1243e-04
Loss = 3.4822e-01, PNorm = 63.0536, GNorm = 2.0608, lr_0 = 1.1235e-04
Loss = 3.7558e-01, PNorm = 63.0555, GNorm = 1.3159, lr_0 = 1.1227e-04
Loss = 3.3410e-01, PNorm = 63.0578, GNorm = 1.1935, lr_0 = 1.1219e-04
Loss = 3.4114e-01, PNorm = 63.0583, GNorm = 1.5373, lr_0 = 1.1212e-04
Loss = 3.2914e-01, PNorm = 63.0580, GNorm = 1.3223, lr_0 = 1.1204e-04
Loss = 3.5215e-01, PNorm = 63.0591, GNorm = 2.4189, lr_0 = 1.1196e-04
Loss = 3.7161e-01, PNorm = 63.0590, GNorm = 1.1764, lr_0 = 1.1189e-04
Loss = 3.5913e-01, PNorm = 63.0600, GNorm = 1.2398, lr_0 = 1.1181e-04
Loss = 3.4957e-01, PNorm = 63.0615, GNorm = 1.2570, lr_0 = 1.1173e-04
Loss = 3.2803e-01, PNorm = 63.0646, GNorm = 1.2940, lr_0 = 1.1166e-04
Loss = 2.8710e-01, PNorm = 63.0664, GNorm = 1.0859, lr_0 = 1.1158e-04
Loss = 3.5247e-01, PNorm = 63.0668, GNorm = 1.2667, lr_0 = 1.1150e-04
Loss = 3.4892e-01, PNorm = 63.0670, GNorm = 1.7097, lr_0 = 1.1143e-04
Loss = 3.5546e-01, PNorm = 63.0678, GNorm = 1.5667, lr_0 = 1.1135e-04
Loss = 3.5078e-01, PNorm = 63.0689, GNorm = 1.2787, lr_0 = 1.1128e-04
Loss = 3.6189e-01, PNorm = 63.0690, GNorm = 2.3836, lr_0 = 1.1120e-04
Loss = 3.4417e-01, PNorm = 63.0714, GNorm = 0.8686, lr_0 = 1.1112e-04
Loss = 3.0185e-01, PNorm = 63.0735, GNorm = 1.8615, lr_0 = 1.1105e-04
Loss = 3.5315e-01, PNorm = 63.0731, GNorm = 2.4366, lr_0 = 1.1097e-04
Loss = 3.4685e-01, PNorm = 63.0742, GNorm = 1.8580, lr_0 = 1.1089e-04
Loss = 3.2707e-01, PNorm = 63.0753, GNorm = 1.5110, lr_0 = 1.1082e-04
Loss = 2.9889e-01, PNorm = 63.0760, GNorm = 1.2805, lr_0 = 1.1074e-04
Loss = 2.9592e-01, PNorm = 63.0764, GNorm = 1.6686, lr_0 = 1.1067e-04
Loss = 3.7632e-01, PNorm = 63.0772, GNorm = 1.5811, lr_0 = 1.1059e-04
Loss = 3.3856e-01, PNorm = 63.0788, GNorm = 1.7617, lr_0 = 1.1052e-04
Loss = 3.3110e-01, PNorm = 63.0813, GNorm = 1.3673, lr_0 = 1.1044e-04
Loss = 3.1536e-01, PNorm = 63.0811, GNorm = 1.1142, lr_0 = 1.1036e-04
Loss = 3.1481e-01, PNorm = 63.0809, GNorm = 1.1965, lr_0 = 1.1029e-04
Loss = 3.5269e-01, PNorm = 63.0833, GNorm = 1.4821, lr_0 = 1.1021e-04
Loss = 3.5779e-01, PNorm = 63.0845, GNorm = 1.3604, lr_0 = 1.1014e-04
Loss = 3.6236e-01, PNorm = 63.0854, GNorm = 1.9421, lr_0 = 1.1006e-04
Loss = 3.6087e-01, PNorm = 63.0847, GNorm = 1.6514, lr_0 = 1.0999e-04
Loss = 3.0800e-01, PNorm = 63.0854, GNorm = 1.2678, lr_0 = 1.0991e-04
Loss = 3.5918e-01, PNorm = 63.0868, GNorm = 2.1384, lr_0 = 1.0984e-04
Loss = 2.9765e-01, PNorm = 63.0866, GNorm = 1.5823, lr_0 = 1.0976e-04
Loss = 3.4097e-01, PNorm = 63.0866, GNorm = 1.6398, lr_0 = 1.0969e-04
Loss = 3.0049e-01, PNorm = 63.0884, GNorm = 1.5408, lr_0 = 1.0961e-04
Loss = 3.2232e-01, PNorm = 63.0902, GNorm = 1.3840, lr_0 = 1.0954e-04
Loss = 3.1607e-01, PNorm = 63.0912, GNorm = 1.2231, lr_0 = 1.0946e-04
Loss = 3.7813e-01, PNorm = 63.0928, GNorm = 1.2039, lr_0 = 1.0939e-04
Loss = 3.4994e-01, PNorm = 63.0931, GNorm = 1.1565, lr_0 = 1.0931e-04
Loss = 3.8905e-01, PNorm = 63.0941, GNorm = 1.4320, lr_0 = 1.0924e-04
Loss = 3.2442e-01, PNorm = 63.0941, GNorm = 1.2767, lr_0 = 1.0916e-04
Loss = 3.4653e-01, PNorm = 63.0948, GNorm = 1.3041, lr_0 = 1.0909e-04
Loss = 3.7275e-01, PNorm = 63.0969, GNorm = 1.1099, lr_0 = 1.0901e-04
Loss = 3.6403e-01, PNorm = 63.0973, GNorm = 2.1589, lr_0 = 1.0894e-04
Loss = 3.6198e-01, PNorm = 63.0971, GNorm = 1.7351, lr_0 = 1.0886e-04
Loss = 3.5983e-01, PNorm = 63.0981, GNorm = 1.6870, lr_0 = 1.0879e-04
Loss = 3.1962e-01, PNorm = 63.0988, GNorm = 1.5385, lr_0 = 1.0871e-04
Loss = 4.0901e-01, PNorm = 63.0995, GNorm = 1.8996, lr_0 = 1.0864e-04
Loss = 3.7068e-01, PNorm = 63.1005, GNorm = 1.7367, lr_0 = 1.0856e-04
Validation mae = 0.111626
Epoch 29
Loss = 4.2672e-01, PNorm = 63.1011, GNorm = 1.9601, lr_0 = 1.0849e-04
Loss = 3.6472e-01, PNorm = 63.1023, GNorm = 1.5009, lr_0 = 1.0841e-04
Loss = 3.2535e-01, PNorm = 63.1032, GNorm = 1.2356, lr_0 = 1.0834e-04
Loss = 3.3781e-01, PNorm = 63.1037, GNorm = 1.1686, lr_0 = 1.0827e-04
Loss = 3.4383e-01, PNorm = 63.1030, GNorm = 1.7713, lr_0 = 1.0819e-04
Loss = 3.2905e-01, PNorm = 63.1047, GNorm = 1.2583, lr_0 = 1.0812e-04
Loss = 3.5654e-01, PNorm = 63.1062, GNorm = 1.1068, lr_0 = 1.0804e-04
Loss = 3.0761e-01, PNorm = 63.1078, GNorm = 1.3575, lr_0 = 1.0797e-04
Loss = 3.4146e-01, PNorm = 63.1075, GNorm = 1.4056, lr_0 = 1.0790e-04
Loss = 3.4390e-01, PNorm = 63.1079, GNorm = 1.2548, lr_0 = 1.0782e-04
Loss = 3.6033e-01, PNorm = 63.1080, GNorm = 2.1674, lr_0 = 1.0775e-04
Loss = 3.8506e-01, PNorm = 63.1104, GNorm = 2.0009, lr_0 = 1.0767e-04
Loss = 3.3666e-01, PNorm = 63.1113, GNorm = 1.4899, lr_0 = 1.0760e-04
Loss = 3.5089e-01, PNorm = 63.1123, GNorm = 1.4826, lr_0 = 1.0753e-04
Loss = 3.1373e-01, PNorm = 63.1116, GNorm = 1.6881, lr_0 = 1.0745e-04
Loss = 3.0622e-01, PNorm = 63.1112, GNorm = 1.4577, lr_0 = 1.0738e-04
Loss = 3.0698e-01, PNorm = 63.1115, GNorm = 1.5200, lr_0 = 1.0731e-04
Loss = 3.7652e-01, PNorm = 63.1121, GNorm = 1.4681, lr_0 = 1.0723e-04
Loss = 3.0211e-01, PNorm = 63.1134, GNorm = 1.5218, lr_0 = 1.0716e-04
Loss = 3.4437e-01, PNorm = 63.1146, GNorm = 1.4213, lr_0 = 1.0709e-04
Loss = 3.8332e-01, PNorm = 63.1160, GNorm = 1.5454, lr_0 = 1.0701e-04
Loss = 3.2353e-01, PNorm = 63.1168, GNorm = 1.5162, lr_0 = 1.0694e-04
Loss = 3.2931e-01, PNorm = 63.1174, GNorm = 1.7662, lr_0 = 1.0687e-04
Loss = 3.4588e-01, PNorm = 63.1176, GNorm = 1.5045, lr_0 = 1.0679e-04
Loss = 3.6104e-01, PNorm = 63.1172, GNorm = 1.7362, lr_0 = 1.0672e-04
Loss = 3.2995e-01, PNorm = 63.1192, GNorm = 1.6513, lr_0 = 1.0665e-04
Loss = 3.7331e-01, PNorm = 63.1184, GNorm = 1.6300, lr_0 = 1.0657e-04
Loss = 3.4281e-01, PNorm = 63.1186, GNorm = 1.9558, lr_0 = 1.0650e-04
Loss = 3.4598e-01, PNorm = 63.1209, GNorm = 1.1908, lr_0 = 1.0643e-04
Loss = 3.9714e-01, PNorm = 63.1221, GNorm = 1.8483, lr_0 = 1.0635e-04
Loss = 3.0118e-01, PNorm = 63.1228, GNorm = 1.2874, lr_0 = 1.0628e-04
Loss = 3.0869e-01, PNorm = 63.1244, GNorm = 1.3311, lr_0 = 1.0621e-04
Loss = 3.2849e-01, PNorm = 63.1252, GNorm = 1.5782, lr_0 = 1.0614e-04
Loss = 3.3434e-01, PNorm = 63.1261, GNorm = 1.2401, lr_0 = 1.0606e-04
Loss = 3.0593e-01, PNorm = 63.1276, GNorm = 1.1861, lr_0 = 1.0599e-04
Loss = 3.5980e-01, PNorm = 63.1285, GNorm = 1.4218, lr_0 = 1.0592e-04
Loss = 3.2316e-01, PNorm = 63.1288, GNorm = 1.7731, lr_0 = 1.0585e-04
Loss = 3.1093e-01, PNorm = 63.1283, GNorm = 1.7835, lr_0 = 1.0577e-04
Loss = 3.7928e-01, PNorm = 63.1286, GNorm = 2.6023, lr_0 = 1.0570e-04
Loss = 3.5534e-01, PNorm = 63.1304, GNorm = 1.4966, lr_0 = 1.0563e-04
Loss = 3.8433e-01, PNorm = 63.1321, GNorm = 1.3254, lr_0 = 1.0556e-04
Loss = 3.2565e-01, PNorm = 63.1331, GNorm = 1.5052, lr_0 = 1.0548e-04
Loss = 3.4822e-01, PNorm = 63.1334, GNorm = 1.2866, lr_0 = 1.0541e-04
Loss = 3.1590e-01, PNorm = 63.1336, GNorm = 1.3157, lr_0 = 1.0534e-04
Loss = 3.0085e-01, PNorm = 63.1338, GNorm = 1.0796, lr_0 = 1.0527e-04
Loss = 3.7335e-01, PNorm = 63.1355, GNorm = 1.3493, lr_0 = 1.0519e-04
Loss = 3.7654e-01, PNorm = 63.1364, GNorm = 1.3815, lr_0 = 1.0512e-04
Loss = 3.1405e-01, PNorm = 63.1375, GNorm = 1.7897, lr_0 = 1.0505e-04
Loss = 2.9927e-01, PNorm = 63.1390, GNorm = 1.3034, lr_0 = 1.0498e-04
Loss = 3.6690e-01, PNorm = 63.1390, GNorm = 1.6696, lr_0 = 1.0491e-04
Loss = 3.1227e-01, PNorm = 63.1383, GNorm = 1.1802, lr_0 = 1.0483e-04
Loss = 3.4999e-01, PNorm = 63.1395, GNorm = 1.5539, lr_0 = 1.0476e-04
Loss = 4.0370e-01, PNorm = 63.1411, GNorm = 1.8071, lr_0 = 1.0469e-04
Loss = 3.5826e-01, PNorm = 63.1421, GNorm = 1.9989, lr_0 = 1.0462e-04
Loss = 3.1295e-01, PNorm = 63.1425, GNorm = 1.6040, lr_0 = 1.0455e-04
Loss = 3.1969e-01, PNorm = 63.1442, GNorm = 1.4398, lr_0 = 1.0448e-04
Loss = 4.1208e-01, PNorm = 63.1463, GNorm = 1.7500, lr_0 = 1.0440e-04
Loss = 3.5672e-01, PNorm = 63.1470, GNorm = 1.4568, lr_0 = 1.0433e-04
Loss = 4.1107e-01, PNorm = 63.1477, GNorm = 1.7456, lr_0 = 1.0426e-04
Loss = 3.2673e-01, PNorm = 63.1485, GNorm = 1.4178, lr_0 = 1.0419e-04
Loss = 3.7265e-01, PNorm = 63.1492, GNorm = 2.0012, lr_0 = 1.0412e-04
Loss = 3.6229e-01, PNorm = 63.1502, GNorm = 1.5472, lr_0 = 1.0405e-04
Loss = 3.1331e-01, PNorm = 63.1520, GNorm = 1.5128, lr_0 = 1.0398e-04
Loss = 3.4973e-01, PNorm = 63.1532, GNorm = 1.0722, lr_0 = 1.0391e-04
Loss = 3.1144e-01, PNorm = 63.1534, GNorm = 1.7204, lr_0 = 1.0383e-04
Loss = 3.4599e-01, PNorm = 63.1533, GNorm = 1.4051, lr_0 = 1.0376e-04
Loss = 3.1892e-01, PNorm = 63.1547, GNorm = 1.2421, lr_0 = 1.0369e-04
Loss = 3.3508e-01, PNorm = 63.1557, GNorm = 1.1682, lr_0 = 1.0362e-04
Loss = 3.2312e-01, PNorm = 63.1572, GNorm = 1.7276, lr_0 = 1.0355e-04
Loss = 3.8880e-01, PNorm = 63.1584, GNorm = 2.0063, lr_0 = 1.0348e-04
Loss = 3.8375e-01, PNorm = 63.1597, GNorm = 1.1835, lr_0 = 1.0341e-04
Loss = 3.2916e-01, PNorm = 63.1619, GNorm = 1.7260, lr_0 = 1.0334e-04
Loss = 3.1792e-01, PNorm = 63.1633, GNorm = 1.5358, lr_0 = 1.0327e-04
Loss = 3.6949e-01, PNorm = 63.1638, GNorm = 1.8491, lr_0 = 1.0320e-04
Loss = 3.2453e-01, PNorm = 63.1638, GNorm = 1.3484, lr_0 = 1.0312e-04
Loss = 2.7986e-01, PNorm = 63.1636, GNorm = 1.2970, lr_0 = 1.0305e-04
Loss = 3.2185e-01, PNorm = 63.1624, GNorm = 1.6284, lr_0 = 1.0298e-04
Loss = 3.9502e-01, PNorm = 63.1638, GNorm = 2.3386, lr_0 = 1.0291e-04
Loss = 3.9581e-01, PNorm = 63.1659, GNorm = 1.4037, lr_0 = 1.0284e-04
Loss = 3.3915e-01, PNorm = 63.1656, GNorm = 1.0604, lr_0 = 1.0277e-04
Loss = 3.2736e-01, PNorm = 63.1664, GNorm = 1.4637, lr_0 = 1.0270e-04
Loss = 3.3172e-01, PNorm = 63.1678, GNorm = 1.4885, lr_0 = 1.0263e-04
Loss = 3.2100e-01, PNorm = 63.1678, GNorm = 1.5398, lr_0 = 1.0256e-04
Loss = 3.4083e-01, PNorm = 63.1678, GNorm = 0.9780, lr_0 = 1.0249e-04
Loss = 3.4356e-01, PNorm = 63.1692, GNorm = 1.9163, lr_0 = 1.0242e-04
Loss = 3.0985e-01, PNorm = 63.1691, GNorm = 1.5108, lr_0 = 1.0235e-04
Loss = 3.5128e-01, PNorm = 63.1691, GNorm = 1.2217, lr_0 = 1.0228e-04
Loss = 3.2385e-01, PNorm = 63.1694, GNorm = 1.7301, lr_0 = 1.0221e-04
Loss = 4.1352e-01, PNorm = 63.1702, GNorm = 2.2334, lr_0 = 1.0214e-04
Loss = 3.3267e-01, PNorm = 63.1711, GNorm = 1.2907, lr_0 = 1.0207e-04
Loss = 3.2389e-01, PNorm = 63.1728, GNorm = 1.6986, lr_0 = 1.0200e-04
Loss = 3.5641e-01, PNorm = 63.1735, GNorm = 2.0717, lr_0 = 1.0193e-04
Loss = 3.5369e-01, PNorm = 63.1746, GNorm = 1.2346, lr_0 = 1.0186e-04
Loss = 3.4189e-01, PNorm = 63.1755, GNorm = 1.4644, lr_0 = 1.0179e-04
Loss = 3.4176e-01, PNorm = 63.1751, GNorm = 1.2206, lr_0 = 1.0172e-04
Loss = 3.6283e-01, PNorm = 63.1760, GNorm = 1.4543, lr_0 = 1.0165e-04
Loss = 3.3442e-01, PNorm = 63.1770, GNorm = 1.4876, lr_0 = 1.0158e-04
Loss = 3.5214e-01, PNorm = 63.1786, GNorm = 1.9219, lr_0 = 1.0151e-04
Loss = 4.0466e-01, PNorm = 63.1783, GNorm = 1.4760, lr_0 = 1.0144e-04
Loss = 3.0229e-01, PNorm = 63.1785, GNorm = 1.2085, lr_0 = 1.0137e-04
Loss = 3.2659e-01, PNorm = 63.1799, GNorm = 1.5642, lr_0 = 1.0130e-04
Loss = 3.1804e-01, PNorm = 63.1794, GNorm = 1.7107, lr_0 = 1.0123e-04
Loss = 3.8239e-01, PNorm = 63.1801, GNorm = 1.5934, lr_0 = 1.0116e-04
Loss = 3.2960e-01, PNorm = 63.1815, GNorm = 1.2589, lr_0 = 1.0110e-04
Loss = 3.5535e-01, PNorm = 63.1816, GNorm = 1.8061, lr_0 = 1.0103e-04
Loss = 2.9248e-01, PNorm = 63.1818, GNorm = 1.8251, lr_0 = 1.0096e-04
Loss = 3.6486e-01, PNorm = 63.1835, GNorm = 2.0916, lr_0 = 1.0089e-04
Loss = 3.5954e-01, PNorm = 63.1856, GNorm = 1.2166, lr_0 = 1.0082e-04
Loss = 3.1607e-01, PNorm = 63.1861, GNorm = 1.6786, lr_0 = 1.0075e-04
Loss = 3.5679e-01, PNorm = 63.1876, GNorm = 2.2347, lr_0 = 1.0068e-04
Loss = 3.3452e-01, PNorm = 63.1890, GNorm = 1.9562, lr_0 = 1.0061e-04
Loss = 3.0038e-01, PNorm = 63.1894, GNorm = 1.3984, lr_0 = 1.0054e-04
Loss = 3.3418e-01, PNorm = 63.1890, GNorm = 1.6648, lr_0 = 1.0047e-04
Loss = 3.1772e-01, PNorm = 63.1895, GNorm = 1.1428, lr_0 = 1.0041e-04
Loss = 3.0835e-01, PNorm = 63.1903, GNorm = 1.6282, lr_0 = 1.0034e-04
Loss = 3.0512e-01, PNorm = 63.1910, GNorm = 1.7875, lr_0 = 1.0027e-04
Loss = 3.1442e-01, PNorm = 63.1922, GNorm = 1.5082, lr_0 = 1.0020e-04
Loss = 3.4963e-01, PNorm = 63.1932, GNorm = 1.8186, lr_0 = 1.0013e-04
Loss = 3.1238e-01, PNorm = 63.1949, GNorm = 1.3767, lr_0 = 1.0006e-04
Loss = 3.6599e-01, PNorm = 63.1950, GNorm = 1.7418, lr_0 = 1.0000e-04
Validation mae = 0.111121
Model 0 best validation mae = 0.110481 on epoch 25
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.110720
Ensemble test mae = 0.110720
Fold 4
Splitting data with seed 4
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=300, bias=False)
        (W_h): Linear(in_features=300, out_features=300, bias=False)
        (W_o): Linear(in_features=433, out_features=300, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.2, inplace=False)
    (1): Linear(in_features=300, out_features=300, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=300, out_features=300, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.2, inplace=False)
    (7): Linear(in_features=300, out_features=1, bias=True)
  )
)
Number of parameters = 445,201
Moving model to cuda
Epoch 0
Loss = 1.0107e+00, PNorm = 38.1708, GNorm = 3.5633, lr_0 = 1.0413e-04
Loss = 9.7242e-01, PNorm = 38.1706, GNorm = 4.2768, lr_0 = 1.0788e-04
Loss = 1.0388e+00, PNorm = 38.1702, GNorm = 2.7116, lr_0 = 1.1163e-04
Loss = 9.4620e-01, PNorm = 38.1700, GNorm = 2.9648, lr_0 = 1.1537e-04
Loss = 1.0182e+00, PNorm = 38.1706, GNorm = 2.0382, lr_0 = 1.1913e-04
Loss = 9.2013e-01, PNorm = 38.1709, GNorm = 2.3040, lr_0 = 1.2287e-04
Loss = 9.7582e-01, PNorm = 38.1718, GNorm = 1.8837, lr_0 = 1.2663e-04
Loss = 8.8800e-01, PNorm = 38.1726, GNorm = 2.5267, lr_0 = 1.3038e-04
Loss = 9.9384e-01, PNorm = 38.1739, GNorm = 4.3494, lr_0 = 1.3413e-04
Loss = 9.2832e-01, PNorm = 38.1743, GNorm = 3.1808, lr_0 = 1.3788e-04
Loss = 8.7706e-01, PNorm = 38.1750, GNorm = 2.4982, lr_0 = 1.4163e-04
Loss = 8.3650e-01, PNorm = 38.1766, GNorm = 3.1130, lr_0 = 1.4537e-04
Loss = 8.3834e-01, PNorm = 38.1795, GNorm = 1.8954, lr_0 = 1.4913e-04
Loss = 8.6850e-01, PNorm = 38.1812, GNorm = 3.0711, lr_0 = 1.5288e-04
Loss = 9.1303e-01, PNorm = 38.1826, GNorm = 2.5238, lr_0 = 1.5662e-04
Loss = 7.7116e-01, PNorm = 38.1852, GNorm = 1.7302, lr_0 = 1.6038e-04
Loss = 9.4312e-01, PNorm = 38.1877, GNorm = 3.2843, lr_0 = 1.6412e-04
Loss = 6.9830e-01, PNorm = 38.1905, GNorm = 5.9668, lr_0 = 1.6788e-04
Loss = 7.6032e-01, PNorm = 38.1939, GNorm = 2.3042, lr_0 = 1.7163e-04
Loss = 8.5313e-01, PNorm = 38.1976, GNorm = 5.0038, lr_0 = 1.7538e-04
Loss = 7.1288e-01, PNorm = 38.1999, GNorm = 5.4210, lr_0 = 1.7913e-04
Loss = 8.2703e-01, PNorm = 38.2025, GNorm = 2.4365, lr_0 = 1.8288e-04
Loss = 8.3104e-01, PNorm = 38.2059, GNorm = 9.6647, lr_0 = 1.8662e-04
Loss = 7.7296e-01, PNorm = 38.2082, GNorm = 1.6945, lr_0 = 1.9038e-04
Loss = 7.9199e-01, PNorm = 38.2108, GNorm = 3.8448, lr_0 = 1.9413e-04
Loss = 8.0874e-01, PNorm = 38.2139, GNorm = 2.8538, lr_0 = 1.9788e-04
Loss = 6.8654e-01, PNorm = 38.2173, GNorm = 7.9047, lr_0 = 2.0163e-04
Loss = 8.2089e-01, PNorm = 38.2188, GNorm = 4.1520, lr_0 = 2.0537e-04
Loss = 7.8797e-01, PNorm = 38.2196, GNorm = 2.2823, lr_0 = 2.0913e-04
Loss = 7.4161e-01, PNorm = 38.2231, GNorm = 3.5300, lr_0 = 2.1288e-04
Loss = 7.8243e-01, PNorm = 38.2272, GNorm = 2.1052, lr_0 = 2.1663e-04
Loss = 7.1479e-01, PNorm = 38.2307, GNorm = 2.3885, lr_0 = 2.2038e-04
Loss = 7.3711e-01, PNorm = 38.2329, GNorm = 1.6143, lr_0 = 2.2412e-04
Loss = 7.3492e-01, PNorm = 38.2357, GNorm = 3.2624, lr_0 = 2.2787e-04
Loss = 7.1587e-01, PNorm = 38.2386, GNorm = 2.2858, lr_0 = 2.3163e-04
Loss = 6.9903e-01, PNorm = 38.2403, GNorm = 1.6149, lr_0 = 2.3538e-04
Loss = 7.6161e-01, PNorm = 38.2436, GNorm = 4.4252, lr_0 = 2.3913e-04
Loss = 6.8435e-01, PNorm = 38.2461, GNorm = 6.6516, lr_0 = 2.4288e-04
Loss = 7.0548e-01, PNorm = 38.2491, GNorm = 3.2418, lr_0 = 2.4662e-04
Loss = 7.7141e-01, PNorm = 38.2534, GNorm = 2.8834, lr_0 = 2.5038e-04
Loss = 7.8732e-01, PNorm = 38.2595, GNorm = 2.1587, lr_0 = 2.5413e-04
Loss = 6.8572e-01, PNorm = 38.2646, GNorm = 1.7404, lr_0 = 2.5788e-04
Loss = 6.1695e-01, PNorm = 38.2672, GNorm = 3.0087, lr_0 = 2.6163e-04
Loss = 7.0816e-01, PNorm = 38.2703, GNorm = 5.1305, lr_0 = 2.6537e-04
Loss = 7.4126e-01, PNorm = 38.2729, GNorm = 8.5519, lr_0 = 2.6912e-04
Loss = 7.4332e-01, PNorm = 38.2757, GNorm = 4.5720, lr_0 = 2.7288e-04
Loss = 7.5776e-01, PNorm = 38.2810, GNorm = 2.6625, lr_0 = 2.7663e-04
Loss = 6.6379e-01, PNorm = 38.2861, GNorm = 2.1995, lr_0 = 2.8038e-04
Loss = 7.3914e-01, PNorm = 38.2904, GNorm = 3.4958, lr_0 = 2.8413e-04
Loss = 7.5513e-01, PNorm = 38.2954, GNorm = 2.8961, lr_0 = 2.8787e-04
Loss = 8.2735e-01, PNorm = 38.2991, GNorm = 2.0079, lr_0 = 2.9163e-04
Loss = 7.6309e-01, PNorm = 38.3033, GNorm = 4.2077, lr_0 = 2.9538e-04
Loss = 6.7827e-01, PNorm = 38.3083, GNorm = 2.7066, lr_0 = 2.9913e-04
Loss = 7.1965e-01, PNorm = 38.3112, GNorm = 5.0958, lr_0 = 3.0288e-04
Loss = 6.4183e-01, PNorm = 38.3141, GNorm = 1.8144, lr_0 = 3.0662e-04
Loss = 6.2546e-01, PNorm = 38.3193, GNorm = 3.5306, lr_0 = 3.1037e-04
Loss = 7.6195e-01, PNorm = 38.3242, GNorm = 1.6660, lr_0 = 3.1413e-04
Loss = 7.2301e-01, PNorm = 38.3280, GNorm = 3.6549, lr_0 = 3.1788e-04
Loss = 6.5922e-01, PNorm = 38.3321, GNorm = 2.3790, lr_0 = 3.2163e-04
Loss = 7.3258e-01, PNorm = 38.3344, GNorm = 1.5735, lr_0 = 3.2538e-04
Loss = 5.7545e-01, PNorm = 38.3396, GNorm = 3.3237, lr_0 = 3.2912e-04
Loss = 6.8155e-01, PNorm = 38.3445, GNorm = 2.1958, lr_0 = 3.3288e-04
Loss = 7.3225e-01, PNorm = 38.3503, GNorm = 4.4461, lr_0 = 3.3663e-04
Loss = 7.0162e-01, PNorm = 38.3525, GNorm = 2.6844, lr_0 = 3.4038e-04
Loss = 6.8599e-01, PNorm = 38.3604, GNorm = 6.7680, lr_0 = 3.4413e-04
Loss = 6.3241e-01, PNorm = 38.3674, GNorm = 7.7489, lr_0 = 3.4787e-04
Loss = 6.1117e-01, PNorm = 38.3706, GNorm = 2.5172, lr_0 = 3.5162e-04
Loss = 7.1860e-01, PNorm = 38.3772, GNorm = 5.6326, lr_0 = 3.5538e-04
Loss = 6.7671e-01, PNorm = 38.3832, GNorm = 10.7375, lr_0 = 3.5913e-04
Loss = 7.2411e-01, PNorm = 38.3868, GNorm = 1.2475, lr_0 = 3.6288e-04
Loss = 6.5908e-01, PNorm = 38.3937, GNorm = 1.2862, lr_0 = 3.6662e-04
Loss = 6.3285e-01, PNorm = 38.3985, GNorm = 1.6513, lr_0 = 3.7037e-04
Loss = 7.0736e-01, PNorm = 38.4058, GNorm = 1.4404, lr_0 = 3.7413e-04
Loss = 6.7713e-01, PNorm = 38.4138, GNorm = 1.8680, lr_0 = 3.7788e-04
Loss = 7.6303e-01, PNorm = 38.4166, GNorm = 2.3788, lr_0 = 3.8163e-04
Loss = 6.7993e-01, PNorm = 38.4241, GNorm = 7.2382, lr_0 = 3.8537e-04
Loss = 7.8139e-01, PNorm = 38.4271, GNorm = 2.3637, lr_0 = 3.8912e-04
Loss = 5.8511e-01, PNorm = 38.4348, GNorm = 3.7211, lr_0 = 3.9287e-04
Loss = 6.8870e-01, PNorm = 38.4391, GNorm = 1.8217, lr_0 = 3.9663e-04
Loss = 6.8395e-01, PNorm = 38.4475, GNorm = 1.0464, lr_0 = 4.0038e-04
Loss = 6.6819e-01, PNorm = 38.4547, GNorm = 1.4719, lr_0 = 4.0413e-04
Loss = 7.7910e-01, PNorm = 38.4571, GNorm = 3.1596, lr_0 = 4.0787e-04
Loss = 6.9373e-01, PNorm = 38.4637, GNorm = 4.0964, lr_0 = 4.1162e-04
Loss = 7.5040e-01, PNorm = 38.4691, GNorm = 4.4842, lr_0 = 4.1537e-04
Loss = 7.1801e-01, PNorm = 38.4752, GNorm = 1.8624, lr_0 = 4.1913e-04
Loss = 6.5174e-01, PNorm = 38.4798, GNorm = 4.0624, lr_0 = 4.2288e-04
Loss = 5.9472e-01, PNorm = 38.4855, GNorm = 3.2009, lr_0 = 4.2662e-04
Loss = 6.0552e-01, PNorm = 38.4936, GNorm = 2.8914, lr_0 = 4.3037e-04
Loss = 6.9046e-01, PNorm = 38.5011, GNorm = 1.8704, lr_0 = 4.3412e-04
Loss = 6.0530e-01, PNorm = 38.5064, GNorm = 1.2857, lr_0 = 4.3788e-04
Loss = 6.3689e-01, PNorm = 38.5112, GNorm = 1.5966, lr_0 = 4.4163e-04
Loss = 6.8952e-01, PNorm = 38.5194, GNorm = 3.2649, lr_0 = 4.4538e-04
Loss = 6.7945e-01, PNorm = 38.5218, GNorm = 1.4495, lr_0 = 4.4912e-04
Loss = 6.2109e-01, PNorm = 38.5266, GNorm = 1.6144, lr_0 = 4.5287e-04
Loss = 6.4625e-01, PNorm = 38.5375, GNorm = 3.5590, lr_0 = 4.5662e-04
Loss = 6.0796e-01, PNorm = 38.5484, GNorm = 1.5100, lr_0 = 4.6038e-04
Loss = 5.8622e-01, PNorm = 38.5570, GNorm = 3.4617, lr_0 = 4.6413e-04
Loss = 6.9491e-01, PNorm = 38.5610, GNorm = 1.1633, lr_0 = 4.6787e-04
Loss = 5.3628e-01, PNorm = 38.5685, GNorm = 2.0304, lr_0 = 4.7162e-04
Loss = 5.7769e-01, PNorm = 38.5751, GNorm = 1.1894, lr_0 = 4.7537e-04
Loss = 6.4623e-01, PNorm = 38.5822, GNorm = 1.1648, lr_0 = 4.7913e-04
Loss = 6.5867e-01, PNorm = 38.5875, GNorm = 3.7534, lr_0 = 4.8288e-04
Loss = 5.7281e-01, PNorm = 38.5929, GNorm = 1.3942, lr_0 = 4.8663e-04
Loss = 6.2739e-01, PNorm = 38.6024, GNorm = 4.0449, lr_0 = 4.9038e-04
Loss = 6.9514e-01, PNorm = 38.6118, GNorm = 2.3730, lr_0 = 4.9412e-04
Loss = 6.3850e-01, PNorm = 38.6230, GNorm = 4.3295, lr_0 = 4.9788e-04
Loss = 6.5761e-01, PNorm = 38.6295, GNorm = 1.2505, lr_0 = 5.0163e-04
Loss = 6.1192e-01, PNorm = 38.6397, GNorm = 1.4432, lr_0 = 5.0538e-04
Loss = 6.0613e-01, PNorm = 38.6449, GNorm = 1.7201, lr_0 = 5.0913e-04
Loss = 6.6217e-01, PNorm = 38.6500, GNorm = 1.2998, lr_0 = 5.1287e-04
Loss = 6.6261e-01, PNorm = 38.6593, GNorm = 1.7560, lr_0 = 5.1663e-04
Loss = 5.8803e-01, PNorm = 38.6700, GNorm = 1.9864, lr_0 = 5.2038e-04
Loss = 6.0779e-01, PNorm = 38.6770, GNorm = 1.6358, lr_0 = 5.2413e-04
Loss = 6.5820e-01, PNorm = 38.6859, GNorm = 1.4873, lr_0 = 5.2788e-04
Loss = 6.7948e-01, PNorm = 38.6972, GNorm = 3.6324, lr_0 = 5.3162e-04
Loss = 6.6211e-01, PNorm = 38.7080, GNorm = 4.2593, lr_0 = 5.3538e-04
Loss = 6.8562e-01, PNorm = 38.7153, GNorm = 2.9705, lr_0 = 5.3912e-04
Loss = 6.0995e-01, PNorm = 38.7244, GNorm = 5.0935, lr_0 = 5.4288e-04
Loss = 6.3107e-01, PNorm = 38.7328, GNorm = 6.0504, lr_0 = 5.4663e-04
Loss = 6.4178e-01, PNorm = 38.7425, GNorm = 2.7320, lr_0 = 5.5038e-04
Validation mae = 0.146195
Epoch 1
Loss = 6.3966e-01, PNorm = 38.7567, GNorm = 2.3556, lr_0 = 5.5413e-04
Loss = 6.0460e-01, PNorm = 38.7704, GNorm = 2.0167, lr_0 = 5.5787e-04
Loss = 6.1250e-01, PNorm = 38.7824, GNorm = 1.7075, lr_0 = 5.6163e-04
Loss = 6.3155e-01, PNorm = 38.7924, GNorm = 1.4551, lr_0 = 5.6538e-04
Loss = 5.8976e-01, PNorm = 38.8059, GNorm = 6.8216, lr_0 = 5.6913e-04
Loss = 6.9333e-01, PNorm = 38.8159, GNorm = 2.3569, lr_0 = 5.7288e-04
Loss = 6.5319e-01, PNorm = 38.8262, GNorm = 1.8510, lr_0 = 5.7662e-04
Loss = 6.0778e-01, PNorm = 38.8415, GNorm = 3.2593, lr_0 = 5.8038e-04
Loss = 6.0989e-01, PNorm = 38.8560, GNorm = 2.8774, lr_0 = 5.8413e-04
Loss = 5.7443e-01, PNorm = 38.8696, GNorm = 2.0492, lr_0 = 5.8788e-04
Loss = 5.6656e-01, PNorm = 38.8809, GNorm = 3.4607, lr_0 = 5.9163e-04
Loss = 5.9404e-01, PNorm = 38.8902, GNorm = 3.1727, lr_0 = 5.9538e-04
Loss = 6.5866e-01, PNorm = 38.9034, GNorm = 3.3104, lr_0 = 5.9913e-04
Loss = 6.1553e-01, PNorm = 38.9145, GNorm = 2.0742, lr_0 = 6.0288e-04
Loss = 5.3028e-01, PNorm = 38.9237, GNorm = 1.5169, lr_0 = 6.0663e-04
Loss = 6.4989e-01, PNorm = 38.9358, GNorm = 1.3512, lr_0 = 6.1038e-04
Loss = 6.5176e-01, PNorm = 38.9453, GNorm = 5.5384, lr_0 = 6.1413e-04
Loss = 6.7981e-01, PNorm = 38.9566, GNorm = 5.4156, lr_0 = 6.1788e-04
Loss = 6.6711e-01, PNorm = 38.9683, GNorm = 1.3763, lr_0 = 6.2163e-04
Loss = 6.8931e-01, PNorm = 38.9857, GNorm = 2.6735, lr_0 = 6.2538e-04
Loss = 5.4363e-01, PNorm = 38.9975, GNorm = 1.8071, lr_0 = 6.2913e-04
Loss = 6.6638e-01, PNorm = 39.0121, GNorm = 1.8668, lr_0 = 6.3288e-04
Loss = 6.0054e-01, PNorm = 39.0248, GNorm = 5.6874, lr_0 = 6.3663e-04
Loss = 6.8949e-01, PNorm = 39.0317, GNorm = 3.7016, lr_0 = 6.4038e-04
Loss = 6.3562e-01, PNorm = 39.0425, GNorm = 3.8226, lr_0 = 6.4413e-04
Loss = 5.9096e-01, PNorm = 39.0537, GNorm = 6.5628, lr_0 = 6.4788e-04
Loss = 5.6174e-01, PNorm = 39.0618, GNorm = 1.4789, lr_0 = 6.5163e-04
Loss = 5.7818e-01, PNorm = 39.0723, GNorm = 3.5217, lr_0 = 6.5538e-04
Loss = 5.4412e-01, PNorm = 39.0871, GNorm = 1.6582, lr_0 = 6.5913e-04
Loss = 5.1881e-01, PNorm = 39.1015, GNorm = 1.2383, lr_0 = 6.6288e-04
Loss = 6.3863e-01, PNorm = 39.1102, GNorm = 1.6040, lr_0 = 6.6663e-04
Loss = 6.1840e-01, PNorm = 39.1172, GNorm = 5.6968, lr_0 = 6.7038e-04
Loss = 4.8983e-01, PNorm = 39.1325, GNorm = 3.6360, lr_0 = 6.7413e-04
Loss = 6.2614e-01, PNorm = 39.1446, GNorm = 1.6007, lr_0 = 6.7788e-04
Loss = 5.4775e-01, PNorm = 39.1540, GNorm = 3.1919, lr_0 = 6.8163e-04
Loss = 6.5059e-01, PNorm = 39.1697, GNorm = 1.4541, lr_0 = 6.8538e-04
Loss = 5.5014e-01, PNorm = 39.1855, GNorm = 1.6761, lr_0 = 6.8913e-04
Loss = 5.8691e-01, PNorm = 39.1930, GNorm = 1.8114, lr_0 = 6.9288e-04
Loss = 5.9017e-01, PNorm = 39.2018, GNorm = 1.4690, lr_0 = 6.9663e-04
Loss = 5.5927e-01, PNorm = 39.2131, GNorm = 2.5795, lr_0 = 7.0038e-04
Loss = 6.3791e-01, PNorm = 39.2295, GNorm = 3.4458, lr_0 = 7.0413e-04
Loss = 5.9255e-01, PNorm = 39.2442, GNorm = 4.6350, lr_0 = 7.0788e-04
Loss = 4.9130e-01, PNorm = 39.2596, GNorm = 2.5379, lr_0 = 7.1163e-04
Loss = 5.9527e-01, PNorm = 39.2708, GNorm = 1.5750, lr_0 = 7.1538e-04
Loss = 5.0494e-01, PNorm = 39.2827, GNorm = 4.4466, lr_0 = 7.1913e-04
Loss = 5.8446e-01, PNorm = 39.2986, GNorm = 3.2408, lr_0 = 7.2288e-04
Loss = 5.6087e-01, PNorm = 39.3101, GNorm = 1.1645, lr_0 = 7.2663e-04
Loss = 6.4008e-01, PNorm = 39.3255, GNorm = 3.4655, lr_0 = 7.3038e-04
Loss = 5.9663e-01, PNorm = 39.3432, GNorm = 2.7020, lr_0 = 7.3413e-04
Loss = 5.4992e-01, PNorm = 39.3611, GNorm = 0.9552, lr_0 = 7.3788e-04
Loss = 6.0510e-01, PNorm = 39.3735, GNorm = 1.2365, lr_0 = 7.4163e-04
Loss = 6.8831e-01, PNorm = 39.3923, GNorm = 2.3503, lr_0 = 7.4538e-04
Loss = 6.0821e-01, PNorm = 39.4105, GNorm = 5.7249, lr_0 = 7.4913e-04
Loss = 6.0623e-01, PNorm = 39.4195, GNorm = 1.6966, lr_0 = 7.5288e-04
Loss = 6.5329e-01, PNorm = 39.4333, GNorm = 5.7088, lr_0 = 7.5663e-04
Loss = 6.0799e-01, PNorm = 39.4513, GNorm = 6.2053, lr_0 = 7.6038e-04
Loss = 6.4013e-01, PNorm = 39.4654, GNorm = 2.1537, lr_0 = 7.6413e-04
Loss = 6.0697e-01, PNorm = 39.4811, GNorm = 1.3428, lr_0 = 7.6788e-04
Loss = 5.3160e-01, PNorm = 39.4942, GNorm = 2.0148, lr_0 = 7.7163e-04
Loss = 6.0870e-01, PNorm = 39.5032, GNorm = 1.6802, lr_0 = 7.7538e-04
Loss = 6.4164e-01, PNorm = 39.5192, GNorm = 4.1008, lr_0 = 7.7913e-04
Loss = 5.8435e-01, PNorm = 39.5343, GNorm = 3.9538, lr_0 = 7.8288e-04
Loss = 6.1603e-01, PNorm = 39.5561, GNorm = 3.7668, lr_0 = 7.8663e-04
Loss = 5.9846e-01, PNorm = 39.5786, GNorm = 1.0141, lr_0 = 7.9038e-04
Loss = 5.1348e-01, PNorm = 39.5906, GNorm = 1.6115, lr_0 = 7.9413e-04
Loss = 5.9864e-01, PNorm = 39.6101, GNorm = 1.1304, lr_0 = 7.9788e-04
Loss = 6.5024e-01, PNorm = 39.6222, GNorm = 7.7898, lr_0 = 8.0163e-04
Loss = 5.9986e-01, PNorm = 39.6319, GNorm = 4.8038, lr_0 = 8.0538e-04
Loss = 6.5237e-01, PNorm = 39.6510, GNorm = 1.4226, lr_0 = 8.0913e-04
Loss = 5.5408e-01, PNorm = 39.6642, GNorm = 4.2794, lr_0 = 8.1288e-04
Loss = 5.8882e-01, PNorm = 39.6764, GNorm = 2.6958, lr_0 = 8.1663e-04
Loss = 6.3580e-01, PNorm = 39.6920, GNorm = 3.0836, lr_0 = 8.2038e-04
Loss = 6.2451e-01, PNorm = 39.7089, GNorm = 2.5356, lr_0 = 8.2413e-04
Loss = 5.0632e-01, PNorm = 39.7214, GNorm = 1.2359, lr_0 = 8.2788e-04
Loss = 5.5667e-01, PNorm = 39.7355, GNorm = 3.3065, lr_0 = 8.3163e-04
Loss = 5.8443e-01, PNorm = 39.7494, GNorm = 2.8894, lr_0 = 8.3538e-04
Loss = 6.5514e-01, PNorm = 39.7689, GNorm = 1.3144, lr_0 = 8.3913e-04
Loss = 5.9303e-01, PNorm = 39.7835, GNorm = 4.2640, lr_0 = 8.4288e-04
Loss = 5.9381e-01, PNorm = 39.8022, GNorm = 2.7239, lr_0 = 8.4663e-04
Loss = 5.8629e-01, PNorm = 39.8258, GNorm = 3.8627, lr_0 = 8.5038e-04
Loss = 6.1257e-01, PNorm = 39.8356, GNorm = 3.8114, lr_0 = 8.5413e-04
Loss = 5.8283e-01, PNorm = 39.8524, GNorm = 2.6884, lr_0 = 8.5788e-04
Loss = 6.9246e-01, PNorm = 39.8718, GNorm = 2.2682, lr_0 = 8.6163e-04
Loss = 6.4171e-01, PNorm = 39.8907, GNorm = 2.6047, lr_0 = 8.6538e-04
Loss = 6.6116e-01, PNorm = 39.9184, GNorm = 2.4803, lr_0 = 8.6913e-04
Loss = 6.4475e-01, PNorm = 39.9382, GNorm = 3.9270, lr_0 = 8.7288e-04
Loss = 6.3297e-01, PNorm = 39.9531, GNorm = 1.7209, lr_0 = 8.7663e-04
Loss = 5.8993e-01, PNorm = 39.9727, GNorm = 1.8213, lr_0 = 8.8038e-04
Loss = 6.2700e-01, PNorm = 39.9964, GNorm = 3.6948, lr_0 = 8.8413e-04
Loss = 6.0255e-01, PNorm = 40.0181, GNorm = 1.9695, lr_0 = 8.8788e-04
Loss = 5.2811e-01, PNorm = 40.0395, GNorm = 2.4060, lr_0 = 8.9163e-04
Loss = 5.9998e-01, PNorm = 40.0539, GNorm = 1.6728, lr_0 = 8.9538e-04
Loss = 5.8156e-01, PNorm = 40.0765, GNorm = 0.9503, lr_0 = 8.9913e-04
Loss = 5.6508e-01, PNorm = 40.0871, GNorm = 2.8971, lr_0 = 9.0288e-04
Loss = 5.1770e-01, PNorm = 40.1057, GNorm = 0.7238, lr_0 = 9.0663e-04
Loss = 5.8301e-01, PNorm = 40.1232, GNorm = 2.6530, lr_0 = 9.1038e-04
Loss = 6.3780e-01, PNorm = 40.1460, GNorm = 2.6438, lr_0 = 9.1413e-04
Loss = 5.8999e-01, PNorm = 40.1729, GNorm = 0.9782, lr_0 = 9.1788e-04
Loss = 5.8468e-01, PNorm = 40.1867, GNorm = 0.8337, lr_0 = 9.2163e-04
Loss = 5.3999e-01, PNorm = 40.2068, GNorm = 2.0357, lr_0 = 9.2538e-04
Loss = 5.3977e-01, PNorm = 40.2279, GNorm = 3.7377, lr_0 = 9.2913e-04
Loss = 5.5292e-01, PNorm = 40.2372, GNorm = 1.5694, lr_0 = 9.3288e-04
Loss = 5.7995e-01, PNorm = 40.2457, GNorm = 1.2042, lr_0 = 9.3663e-04
Loss = 4.9683e-01, PNorm = 40.2553, GNorm = 1.0215, lr_0 = 9.4038e-04
Loss = 5.8325e-01, PNorm = 40.2734, GNorm = 4.1209, lr_0 = 9.4413e-04
Loss = 5.7533e-01, PNorm = 40.3049, GNorm = 2.4686, lr_0 = 9.4788e-04
Loss = 5.8944e-01, PNorm = 40.3296, GNorm = 2.7935, lr_0 = 9.5163e-04
Loss = 5.8269e-01, PNorm = 40.3470, GNorm = 1.2176, lr_0 = 9.5538e-04
Loss = 5.5476e-01, PNorm = 40.3761, GNorm = 2.9063, lr_0 = 9.5913e-04
Loss = 5.4816e-01, PNorm = 40.3902, GNorm = 1.8637, lr_0 = 9.6288e-04
Loss = 5.2476e-01, PNorm = 40.4134, GNorm = 0.9118, lr_0 = 9.6663e-04
Loss = 4.6172e-01, PNorm = 40.4302, GNorm = 1.9555, lr_0 = 9.7038e-04
Loss = 5.9970e-01, PNorm = 40.4519, GNorm = 0.8939, lr_0 = 9.7413e-04
Loss = 5.1338e-01, PNorm = 40.4680, GNorm = 2.3535, lr_0 = 9.7788e-04
Loss = 5.0597e-01, PNorm = 40.4843, GNorm = 2.9953, lr_0 = 9.8163e-04
Loss = 6.2062e-01, PNorm = 40.4991, GNorm = 2.5742, lr_0 = 9.8537e-04
Loss = 6.1842e-01, PNorm = 40.5235, GNorm = 1.5624, lr_0 = 9.8912e-04
Loss = 4.9103e-01, PNorm = 40.5471, GNorm = 1.3598, lr_0 = 9.9288e-04
Loss = 5.1564e-01, PNorm = 40.5726, GNorm = 1.0959, lr_0 = 9.9663e-04
Loss = 5.8564e-01, PNorm = 40.5898, GNorm = 2.5111, lr_0 = 9.9993e-04
Validation mae = 0.131927
Epoch 2
Loss = 4.8698e-01, PNorm = 40.6093, GNorm = 1.1498, lr_0 = 9.9925e-04
Loss = 5.1554e-01, PNorm = 40.6310, GNorm = 1.9376, lr_0 = 9.9856e-04
Loss = 5.8569e-01, PNorm = 40.6491, GNorm = 1.7937, lr_0 = 9.9788e-04
Loss = 6.1619e-01, PNorm = 40.6664, GNorm = 1.8820, lr_0 = 9.9719e-04
Loss = 5.7379e-01, PNorm = 40.6835, GNorm = 1.5487, lr_0 = 9.9651e-04
Loss = 5.8066e-01, PNorm = 40.7003, GNorm = 1.6261, lr_0 = 9.9583e-04
Loss = 5.4888e-01, PNorm = 40.7262, GNorm = 1.8197, lr_0 = 9.9515e-04
Loss = 4.8120e-01, PNorm = 40.7530, GNorm = 2.8561, lr_0 = 9.9446e-04
Loss = 6.1107e-01, PNorm = 40.7712, GNorm = 0.7995, lr_0 = 9.9378e-04
Loss = 6.6035e-01, PNorm = 40.8044, GNorm = 2.2870, lr_0 = 9.9310e-04
Loss = 5.7692e-01, PNorm = 40.8377, GNorm = 4.9309, lr_0 = 9.9242e-04
Loss = 5.5450e-01, PNorm = 40.8558, GNorm = 1.2762, lr_0 = 9.9174e-04
Loss = 6.3529e-01, PNorm = 40.8721, GNorm = 2.2719, lr_0 = 9.9106e-04
Loss = 5.6391e-01, PNorm = 40.8971, GNorm = 1.3614, lr_0 = 9.9038e-04
Loss = 5.1661e-01, PNorm = 40.9199, GNorm = 0.9799, lr_0 = 9.8971e-04
Loss = 5.3923e-01, PNorm = 40.9417, GNorm = 1.1891, lr_0 = 9.8903e-04
Loss = 5.6777e-01, PNorm = 40.9620, GNorm = 3.9804, lr_0 = 9.8835e-04
Loss = 5.1712e-01, PNorm = 40.9891, GNorm = 2.2113, lr_0 = 9.8767e-04
Loss = 5.3648e-01, PNorm = 41.0138, GNorm = 1.4362, lr_0 = 9.8700e-04
Loss = 5.2602e-01, PNorm = 41.0397, GNorm = 2.4545, lr_0 = 9.8632e-04
Loss = 5.6275e-01, PNorm = 41.0490, GNorm = 1.9999, lr_0 = 9.8564e-04
Loss = 5.3553e-01, PNorm = 41.0682, GNorm = 1.9207, lr_0 = 9.8497e-04
Loss = 5.4926e-01, PNorm = 41.0927, GNorm = 2.7427, lr_0 = 9.8429e-04
Loss = 5.6703e-01, PNorm = 41.1223, GNorm = 1.9653, lr_0 = 9.8362e-04
Loss = 5.5635e-01, PNorm = 41.1328, GNorm = 3.2211, lr_0 = 9.8295e-04
Loss = 6.0015e-01, PNorm = 41.1495, GNorm = 3.6456, lr_0 = 9.8227e-04
Loss = 6.0880e-01, PNorm = 41.1756, GNorm = 0.8963, lr_0 = 9.8160e-04
Loss = 5.1017e-01, PNorm = 41.1985, GNorm = 1.9532, lr_0 = 9.8093e-04
Loss = 5.2200e-01, PNorm = 41.2261, GNorm = 1.4990, lr_0 = 9.8026e-04
Loss = 5.9149e-01, PNorm = 41.2463, GNorm = 1.3736, lr_0 = 9.7958e-04
Loss = 5.0793e-01, PNorm = 41.2632, GNorm = 3.2930, lr_0 = 9.7891e-04
Loss = 5.2912e-01, PNorm = 41.2783, GNorm = 1.0032, lr_0 = 9.7824e-04
Loss = 5.1390e-01, PNorm = 41.2988, GNorm = 3.7863, lr_0 = 9.7757e-04
Loss = 5.3365e-01, PNorm = 41.3165, GNorm = 1.2299, lr_0 = 9.7690e-04
Loss = 6.0245e-01, PNorm = 41.3408, GNorm = 2.9426, lr_0 = 9.7623e-04
Loss = 5.9809e-01, PNorm = 41.3676, GNorm = 3.2870, lr_0 = 9.7556e-04
Loss = 5.5094e-01, PNorm = 41.3837, GNorm = 0.9547, lr_0 = 9.7490e-04
Loss = 4.9449e-01, PNorm = 41.4110, GNorm = 1.5252, lr_0 = 9.7423e-04
Loss = 5.6151e-01, PNorm = 41.4447, GNorm = 1.2605, lr_0 = 9.7356e-04
Loss = 5.7223e-01, PNorm = 41.4734, GNorm = 1.3682, lr_0 = 9.7289e-04
Loss = 5.4508e-01, PNorm = 41.4933, GNorm = 3.0686, lr_0 = 9.7223e-04
Loss = 5.4494e-01, PNorm = 41.5067, GNorm = 3.7623, lr_0 = 9.7156e-04
Loss = 5.5614e-01, PNorm = 41.5317, GNorm = 1.4236, lr_0 = 9.7090e-04
Loss = 5.3542e-01, PNorm = 41.5637, GNorm = 1.2726, lr_0 = 9.7023e-04
Loss = 6.0128e-01, PNorm = 41.5917, GNorm = 2.4863, lr_0 = 9.6957e-04
Loss = 5.8091e-01, PNorm = 41.6160, GNorm = 1.1447, lr_0 = 9.6890e-04
Loss = 5.0899e-01, PNorm = 41.6352, GNorm = 2.3437, lr_0 = 9.6824e-04
Loss = 4.9363e-01, PNorm = 41.6431, GNorm = 1.4065, lr_0 = 9.6757e-04
Loss = 4.8216e-01, PNorm = 41.6547, GNorm = 1.5084, lr_0 = 9.6691e-04
Loss = 5.4621e-01, PNorm = 41.6742, GNorm = 0.9777, lr_0 = 9.6625e-04
Loss = 5.9382e-01, PNorm = 41.6916, GNorm = 1.2305, lr_0 = 9.6559e-04
Loss = 5.1742e-01, PNorm = 41.7106, GNorm = 1.4271, lr_0 = 9.6493e-04
Loss = 5.0432e-01, PNorm = 41.7285, GNorm = 3.1435, lr_0 = 9.6427e-04
Loss = 4.8517e-01, PNorm = 41.7503, GNorm = 2.5572, lr_0 = 9.6360e-04
Loss = 5.7339e-01, PNorm = 41.7696, GNorm = 4.4289, lr_0 = 9.6294e-04
Loss = 5.3553e-01, PNorm = 41.7912, GNorm = 1.8576, lr_0 = 9.6228e-04
Loss = 5.1751e-01, PNorm = 41.8131, GNorm = 3.3512, lr_0 = 9.6163e-04
Loss = 6.1563e-01, PNorm = 41.8352, GNorm = 5.3127, lr_0 = 9.6097e-04
Loss = 5.1357e-01, PNorm = 41.8559, GNorm = 2.0443, lr_0 = 9.6031e-04
Loss = 5.4771e-01, PNorm = 41.8681, GNorm = 1.5195, lr_0 = 9.5965e-04
Loss = 5.6840e-01, PNorm = 41.8835, GNorm = 1.6681, lr_0 = 9.5899e-04
Loss = 5.7528e-01, PNorm = 41.9009, GNorm = 2.3009, lr_0 = 9.5834e-04
Loss = 5.5681e-01, PNorm = 41.9241, GNorm = 1.1867, lr_0 = 9.5768e-04
Loss = 5.0321e-01, PNorm = 41.9393, GNorm = 0.8984, lr_0 = 9.5702e-04
Loss = 5.0540e-01, PNorm = 41.9558, GNorm = 3.1211, lr_0 = 9.5637e-04
Loss = 5.5351e-01, PNorm = 41.9803, GNorm = 3.8755, lr_0 = 9.5571e-04
Loss = 5.5429e-01, PNorm = 41.9954, GNorm = 2.4711, lr_0 = 9.5506e-04
Loss = 5.9604e-01, PNorm = 42.0184, GNorm = 2.0200, lr_0 = 9.5440e-04
Loss = 5.4615e-01, PNorm = 42.0436, GNorm = 1.6145, lr_0 = 9.5375e-04
Loss = 4.9033e-01, PNorm = 42.0693, GNorm = 0.9614, lr_0 = 9.5310e-04
Loss = 5.9047e-01, PNorm = 42.0803, GNorm = 1.8381, lr_0 = 9.5244e-04
Loss = 6.0367e-01, PNorm = 42.1089, GNorm = 3.7674, lr_0 = 9.5179e-04
Loss = 5.1057e-01, PNorm = 42.1379, GNorm = 1.0859, lr_0 = 9.5114e-04
Loss = 5.4170e-01, PNorm = 42.1609, GNorm = 2.1256, lr_0 = 9.5049e-04
Loss = 5.2957e-01, PNorm = 42.1857, GNorm = 1.6665, lr_0 = 9.4984e-04
Loss = 5.7807e-01, PNorm = 42.2007, GNorm = 1.6837, lr_0 = 9.4919e-04
Loss = 5.3012e-01, PNorm = 42.2155, GNorm = 1.6876, lr_0 = 9.4854e-04
Loss = 5.5463e-01, PNorm = 42.2373, GNorm = 1.4003, lr_0 = 9.4789e-04
Loss = 5.4537e-01, PNorm = 42.2636, GNorm = 2.8430, lr_0 = 9.4724e-04
Loss = 4.8918e-01, PNorm = 42.2946, GNorm = 1.5683, lr_0 = 9.4659e-04
Loss = 5.2446e-01, PNorm = 42.3175, GNorm = 3.0200, lr_0 = 9.4594e-04
Loss = 5.7242e-01, PNorm = 42.3396, GNorm = 1.3358, lr_0 = 9.4529e-04
Loss = 5.9025e-01, PNorm = 42.3604, GNorm = 2.3973, lr_0 = 9.4464e-04
Loss = 5.7899e-01, PNorm = 42.3828, GNorm = 1.5856, lr_0 = 9.4400e-04
Loss = 4.8593e-01, PNorm = 42.3947, GNorm = 0.8674, lr_0 = 9.4335e-04
Loss = 5.3053e-01, PNorm = 42.4151, GNorm = 1.0297, lr_0 = 9.4270e-04
Loss = 5.3981e-01, PNorm = 42.4387, GNorm = 0.8912, lr_0 = 9.4206e-04
Loss = 4.9250e-01, PNorm = 42.4546, GNorm = 2.0739, lr_0 = 9.4141e-04
Loss = 5.3009e-01, PNorm = 42.4673, GNorm = 1.4289, lr_0 = 9.4077e-04
Loss = 5.7030e-01, PNorm = 42.4890, GNorm = 1.4048, lr_0 = 9.4012e-04
Loss = 5.5761e-01, PNorm = 42.5085, GNorm = 1.3156, lr_0 = 9.3948e-04
Loss = 5.2217e-01, PNorm = 42.5263, GNorm = 0.7088, lr_0 = 9.3884e-04
Loss = 5.8088e-01, PNorm = 42.5453, GNorm = 1.8476, lr_0 = 9.3819e-04
Loss = 5.0027e-01, PNorm = 42.5611, GNorm = 2.1795, lr_0 = 9.3755e-04
Loss = 4.9648e-01, PNorm = 42.5858, GNorm = 0.9895, lr_0 = 9.3691e-04
Loss = 6.2117e-01, PNorm = 42.6015, GNorm = 4.1292, lr_0 = 9.3627e-04
Loss = 5.0152e-01, PNorm = 42.6188, GNorm = 2.0857, lr_0 = 9.3562e-04
Loss = 5.8973e-01, PNorm = 42.6516, GNorm = 1.5646, lr_0 = 9.3498e-04
Loss = 5.1468e-01, PNorm = 42.6887, GNorm = 1.8628, lr_0 = 9.3434e-04
Loss = 5.2619e-01, PNorm = 42.7133, GNorm = 0.9898, lr_0 = 9.3370e-04
Loss = 5.3627e-01, PNorm = 42.7315, GNorm = 2.8876, lr_0 = 9.3306e-04
Loss = 6.1388e-01, PNorm = 42.7575, GNorm = 1.0598, lr_0 = 9.3242e-04
Loss = 4.9673e-01, PNorm = 42.7867, GNorm = 3.7404, lr_0 = 9.3178e-04
Loss = 5.0120e-01, PNorm = 42.8037, GNorm = 0.8834, lr_0 = 9.3115e-04
Loss = 4.7639e-01, PNorm = 42.8237, GNorm = 1.5338, lr_0 = 9.3051e-04
Loss = 4.7189e-01, PNorm = 42.8454, GNorm = 1.3132, lr_0 = 9.2987e-04
Loss = 5.0516e-01, PNorm = 42.8667, GNorm = 1.7355, lr_0 = 9.2923e-04
Loss = 5.8013e-01, PNorm = 42.8764, GNorm = 1.2363, lr_0 = 9.2860e-04
Loss = 5.6051e-01, PNorm = 42.8923, GNorm = 2.1384, lr_0 = 9.2796e-04
Loss = 4.9707e-01, PNorm = 42.9204, GNorm = 3.1715, lr_0 = 9.2733e-04
Loss = 5.8868e-01, PNorm = 42.9408, GNorm = 0.8418, lr_0 = 9.2669e-04
Loss = 5.7319e-01, PNorm = 42.9542, GNorm = 1.5969, lr_0 = 9.2606e-04
Loss = 6.3133e-01, PNorm = 42.9881, GNorm = 2.1964, lr_0 = 9.2542e-04
Loss = 4.7691e-01, PNorm = 43.0170, GNorm = 1.1171, lr_0 = 9.2479e-04
Loss = 5.0298e-01, PNorm = 43.0385, GNorm = 1.4301, lr_0 = 9.2415e-04
Loss = 5.2038e-01, PNorm = 43.0503, GNorm = 1.0812, lr_0 = 9.2352e-04
Loss = 5.3170e-01, PNorm = 43.0638, GNorm = 1.2637, lr_0 = 9.2289e-04
Loss = 5.2283e-01, PNorm = 43.0802, GNorm = 1.9044, lr_0 = 9.2226e-04
Loss = 5.1197e-01, PNorm = 43.0966, GNorm = 1.2349, lr_0 = 9.2162e-04
Loss = 5.4523e-01, PNorm = 43.1053, GNorm = 2.2497, lr_0 = 9.2099e-04
Validation mae = 0.127268
Epoch 3
Loss = 4.8410e-01, PNorm = 43.1253, GNorm = 1.1706, lr_0 = 9.2036e-04
Loss = 4.8341e-01, PNorm = 43.1512, GNorm = 3.6330, lr_0 = 9.1973e-04
Loss = 4.4217e-01, PNorm = 43.1737, GNorm = 1.4392, lr_0 = 9.1910e-04
Loss = 5.3468e-01, PNorm = 43.1899, GNorm = 2.0752, lr_0 = 9.1847e-04
Loss = 5.6924e-01, PNorm = 43.2088, GNorm = 1.4408, lr_0 = 9.1784e-04
Loss = 5.4386e-01, PNorm = 43.2289, GNorm = 0.8014, lr_0 = 9.1721e-04
Loss = 4.6454e-01, PNorm = 43.2505, GNorm = 1.0321, lr_0 = 9.1658e-04
Loss = 5.1324e-01, PNorm = 43.2636, GNorm = 3.4669, lr_0 = 9.1596e-04
Loss = 4.5446e-01, PNorm = 43.2850, GNorm = 2.1160, lr_0 = 9.1533e-04
Loss = 4.5275e-01, PNorm = 43.3062, GNorm = 1.4105, lr_0 = 9.1470e-04
Loss = 4.7855e-01, PNorm = 43.3292, GNorm = 1.5645, lr_0 = 9.1408e-04
Loss = 4.6077e-01, PNorm = 43.3500, GNorm = 1.3851, lr_0 = 9.1345e-04
Loss = 5.1551e-01, PNorm = 43.3658, GNorm = 1.9724, lr_0 = 9.1282e-04
Loss = 4.5618e-01, PNorm = 43.3866, GNorm = 1.5300, lr_0 = 9.1220e-04
Loss = 5.4886e-01, PNorm = 43.3957, GNorm = 1.9776, lr_0 = 9.1157e-04
Loss = 5.8636e-01, PNorm = 43.4234, GNorm = 1.1193, lr_0 = 9.1095e-04
Loss = 6.3605e-01, PNorm = 43.4443, GNorm = 2.2500, lr_0 = 9.1032e-04
Loss = 5.2323e-01, PNorm = 43.4643, GNorm = 1.4072, lr_0 = 9.0970e-04
Loss = 4.7242e-01, PNorm = 43.4910, GNorm = 1.2313, lr_0 = 9.0908e-04
Loss = 5.0905e-01, PNorm = 43.5151, GNorm = 1.6361, lr_0 = 9.0846e-04
Loss = 5.3348e-01, PNorm = 43.5314, GNorm = 2.5751, lr_0 = 9.0783e-04
Loss = 5.8382e-01, PNorm = 43.5506, GNorm = 1.4963, lr_0 = 9.0721e-04
Loss = 4.8880e-01, PNorm = 43.5666, GNorm = 1.5941, lr_0 = 9.0659e-04
Loss = 5.2286e-01, PNorm = 43.5776, GNorm = 1.3318, lr_0 = 9.0597e-04
Loss = 5.7881e-01, PNorm = 43.5928, GNorm = 2.7497, lr_0 = 9.0535e-04
Loss = 4.8851e-01, PNorm = 43.6140, GNorm = 2.0913, lr_0 = 9.0473e-04
Loss = 5.3367e-01, PNorm = 43.6378, GNorm = 1.9465, lr_0 = 9.0411e-04
Loss = 5.1324e-01, PNorm = 43.6528, GNorm = 0.8892, lr_0 = 9.0349e-04
Loss = 5.5972e-01, PNorm = 43.6710, GNorm = 2.0399, lr_0 = 9.0287e-04
Loss = 4.9606e-01, PNorm = 43.6887, GNorm = 1.2894, lr_0 = 9.0225e-04
Loss = 5.0000e-01, PNorm = 43.7156, GNorm = 2.2103, lr_0 = 9.0163e-04
Loss = 5.5227e-01, PNorm = 43.7486, GNorm = 2.2416, lr_0 = 9.0102e-04
Loss = 5.8261e-01, PNorm = 43.7762, GNorm = 2.3069, lr_0 = 9.0040e-04
Loss = 6.1361e-01, PNorm = 43.8033, GNorm = 2.0748, lr_0 = 8.9978e-04
Loss = 5.0070e-01, PNorm = 43.8209, GNorm = 0.9529, lr_0 = 8.9916e-04
Loss = 4.7797e-01, PNorm = 43.8476, GNorm = 1.3192, lr_0 = 8.9855e-04
Loss = 4.7061e-01, PNorm = 43.8716, GNorm = 2.0015, lr_0 = 8.9793e-04
Loss = 5.1698e-01, PNorm = 43.8843, GNorm = 1.3653, lr_0 = 8.9732e-04
Loss = 5.1958e-01, PNorm = 43.9010, GNorm = 2.7932, lr_0 = 8.9670e-04
Loss = 4.8593e-01, PNorm = 43.9225, GNorm = 1.0774, lr_0 = 8.9609e-04
Loss = 5.1976e-01, PNorm = 43.9435, GNorm = 1.5321, lr_0 = 8.9548e-04
Loss = 4.9320e-01, PNorm = 43.9553, GNorm = 3.3283, lr_0 = 8.9486e-04
Loss = 5.1844e-01, PNorm = 43.9834, GNorm = 0.9899, lr_0 = 8.9425e-04
Loss = 5.3320e-01, PNorm = 44.0044, GNorm = 1.4290, lr_0 = 8.9364e-04
Loss = 5.5695e-01, PNorm = 44.0282, GNorm = 1.5095, lr_0 = 8.9302e-04
Loss = 4.7584e-01, PNorm = 44.0483, GNorm = 1.0584, lr_0 = 8.9241e-04
Loss = 5.3024e-01, PNorm = 44.0633, GNorm = 1.7527, lr_0 = 8.9180e-04
Loss = 5.0460e-01, PNorm = 44.0783, GNorm = 2.1928, lr_0 = 8.9119e-04
Loss = 5.0223e-01, PNorm = 44.0944, GNorm = 1.0492, lr_0 = 8.9058e-04
Loss = 5.0046e-01, PNorm = 44.1213, GNorm = 1.4203, lr_0 = 8.8997e-04
Loss = 5.2376e-01, PNorm = 44.1421, GNorm = 1.9758, lr_0 = 8.8936e-04
Loss = 5.2369e-01, PNorm = 44.1641, GNorm = 2.1023, lr_0 = 8.8875e-04
Loss = 4.7711e-01, PNorm = 44.1890, GNorm = 1.8675, lr_0 = 8.8814e-04
Loss = 4.7103e-01, PNorm = 44.2069, GNorm = 1.2924, lr_0 = 8.8753e-04
Loss = 4.4622e-01, PNorm = 44.2260, GNorm = 1.3990, lr_0 = 8.8693e-04
Loss = 5.1403e-01, PNorm = 44.2349, GNorm = 1.7375, lr_0 = 8.8632e-04
Loss = 5.5726e-01, PNorm = 44.2604, GNorm = 1.5406, lr_0 = 8.8571e-04
Loss = 4.8265e-01, PNorm = 44.2850, GNorm = 1.0517, lr_0 = 8.8510e-04
Loss = 5.0431e-01, PNorm = 44.3046, GNorm = 1.3658, lr_0 = 8.8450e-04
Loss = 4.6476e-01, PNorm = 44.3151, GNorm = 2.1123, lr_0 = 8.8389e-04
Loss = 4.5796e-01, PNorm = 44.3346, GNorm = 1.4342, lr_0 = 8.8329e-04
Loss = 4.3862e-01, PNorm = 44.3522, GNorm = 1.5643, lr_0 = 8.8268e-04
Loss = 5.3894e-01, PNorm = 44.3716, GNorm = 1.8625, lr_0 = 8.8208e-04
Loss = 5.5573e-01, PNorm = 44.3923, GNorm = 1.2124, lr_0 = 8.8147e-04
Loss = 5.1321e-01, PNorm = 44.4090, GNorm = 2.9706, lr_0 = 8.8087e-04
Loss = 5.2790e-01, PNorm = 44.4293, GNorm = 1.4853, lr_0 = 8.8026e-04
Loss = 4.8262e-01, PNorm = 44.4513, GNorm = 2.4935, lr_0 = 8.7966e-04
Loss = 5.0514e-01, PNorm = 44.4720, GNorm = 0.9071, lr_0 = 8.7906e-04
Loss = 4.9849e-01, PNorm = 44.4980, GNorm = 1.2823, lr_0 = 8.7846e-04
Loss = 4.7441e-01, PNorm = 44.5238, GNorm = 1.6798, lr_0 = 8.7785e-04
Loss = 4.8082e-01, PNorm = 44.5427, GNorm = 2.7564, lr_0 = 8.7725e-04
Loss = 5.4388e-01, PNorm = 44.5576, GNorm = 1.6739, lr_0 = 8.7665e-04
Loss = 4.9488e-01, PNorm = 44.5724, GNorm = 2.6026, lr_0 = 8.7605e-04
Loss = 5.4728e-01, PNorm = 44.5915, GNorm = 1.6554, lr_0 = 8.7545e-04
Loss = 4.6822e-01, PNorm = 44.6166, GNorm = 2.4947, lr_0 = 8.7485e-04
Loss = 4.7242e-01, PNorm = 44.6387, GNorm = 3.6920, lr_0 = 8.7425e-04
Loss = 5.1059e-01, PNorm = 44.6580, GNorm = 1.1019, lr_0 = 8.7365e-04
Loss = 5.3896e-01, PNorm = 44.6732, GNorm = 1.8338, lr_0 = 8.7306e-04
Loss = 5.2768e-01, PNorm = 44.6852, GNorm = 2.4699, lr_0 = 8.7246e-04
Loss = 4.5023e-01, PNorm = 44.7045, GNorm = 1.2454, lr_0 = 8.7186e-04
Loss = 4.7137e-01, PNorm = 44.7311, GNorm = 1.8951, lr_0 = 8.7126e-04
Loss = 5.2287e-01, PNorm = 44.7531, GNorm = 1.3464, lr_0 = 8.7067e-04
Loss = 4.6421e-01, PNorm = 44.7678, GNorm = 2.0146, lr_0 = 8.7007e-04
Loss = 5.0516e-01, PNorm = 44.7864, GNorm = 1.9113, lr_0 = 8.6947e-04
Loss = 5.4958e-01, PNorm = 44.8147, GNorm = 1.8226, lr_0 = 8.6888e-04
Loss = 5.0624e-01, PNorm = 44.8375, GNorm = 1.1901, lr_0 = 8.6828e-04
Loss = 5.3772e-01, PNorm = 44.8532, GNorm = 2.0930, lr_0 = 8.6769e-04
Loss = 5.4666e-01, PNorm = 44.8772, GNorm = 0.8706, lr_0 = 8.6709e-04
Loss = 5.3676e-01, PNorm = 44.8892, GNorm = 2.8174, lr_0 = 8.6650e-04
Loss = 4.9056e-01, PNorm = 44.9071, GNorm = 1.3440, lr_0 = 8.6590e-04
Loss = 5.4908e-01, PNorm = 44.9205, GNorm = 1.1585, lr_0 = 8.6531e-04
Loss = 5.6868e-01, PNorm = 44.9480, GNorm = 2.9106, lr_0 = 8.6472e-04
Loss = 5.0599e-01, PNorm = 44.9694, GNorm = 2.1053, lr_0 = 8.6413e-04
Loss = 4.8709e-01, PNorm = 44.9892, GNorm = 2.3475, lr_0 = 8.6353e-04
Loss = 5.0370e-01, PNorm = 45.0094, GNorm = 1.0341, lr_0 = 8.6294e-04
Loss = 5.8918e-01, PNorm = 45.0303, GNorm = 2.3989, lr_0 = 8.6235e-04
Loss = 4.9910e-01, PNorm = 45.0601, GNorm = 1.8383, lr_0 = 8.6176e-04
Loss = 5.3638e-01, PNorm = 45.0761, GNorm = 2.5046, lr_0 = 8.6117e-04
Loss = 5.0840e-01, PNorm = 45.1021, GNorm = 1.0510, lr_0 = 8.6058e-04
Loss = 5.1429e-01, PNorm = 45.1178, GNorm = 1.8591, lr_0 = 8.5999e-04
Loss = 5.5964e-01, PNorm = 45.1333, GNorm = 2.1285, lr_0 = 8.5940e-04
Loss = 3.9736e-01, PNorm = 45.1552, GNorm = 1.0760, lr_0 = 8.5881e-04
Loss = 6.2314e-01, PNorm = 45.1811, GNorm = 1.7785, lr_0 = 8.5823e-04
Loss = 4.9723e-01, PNorm = 45.2115, GNorm = 2.9916, lr_0 = 8.5764e-04
Loss = 4.7067e-01, PNorm = 45.2298, GNorm = 1.9718, lr_0 = 8.5705e-04
Loss = 5.5408e-01, PNorm = 45.2388, GNorm = 0.9490, lr_0 = 8.5646e-04
Loss = 5.0089e-01, PNorm = 45.2539, GNorm = 1.8329, lr_0 = 8.5588e-04
Loss = 5.1634e-01, PNorm = 45.2683, GNorm = 1.1332, lr_0 = 8.5529e-04
Loss = 4.8681e-01, PNorm = 45.2855, GNorm = 1.5697, lr_0 = 8.5470e-04
Loss = 4.5520e-01, PNorm = 45.2999, GNorm = 2.5898, lr_0 = 8.5412e-04
Loss = 5.1566e-01, PNorm = 45.3178, GNorm = 1.7571, lr_0 = 8.5353e-04
Loss = 4.6951e-01, PNorm = 45.3437, GNorm = 2.4824, lr_0 = 8.5295e-04
Loss = 5.6081e-01, PNorm = 45.3644, GNorm = 3.2791, lr_0 = 8.5236e-04
Loss = 5.0695e-01, PNorm = 45.3848, GNorm = 2.0868, lr_0 = 8.5178e-04
Loss = 4.6682e-01, PNorm = 45.4057, GNorm = 1.1953, lr_0 = 8.5120e-04
Loss = 4.6829e-01, PNorm = 45.4265, GNorm = 1.4092, lr_0 = 8.5061e-04
Loss = 5.6929e-01, PNorm = 45.4523, GNorm = 1.6672, lr_0 = 8.5003e-04
Loss = 4.9641e-01, PNorm = 45.4831, GNorm = 1.2638, lr_0 = 8.4945e-04
Loss = 5.4627e-01, PNorm = 45.4978, GNorm = 0.8678, lr_0 = 8.4887e-04
Loss = 5.2202e-01, PNorm = 45.5171, GNorm = 1.3820, lr_0 = 8.4828e-04
Validation mae = 0.124768
Epoch 4
Loss = 4.9940e-01, PNorm = 45.5354, GNorm = 1.5241, lr_0 = 8.4770e-04
Loss = 4.2603e-01, PNorm = 45.5498, GNorm = 1.3087, lr_0 = 8.4712e-04
Loss = 5.0260e-01, PNorm = 45.5677, GNorm = 0.9517, lr_0 = 8.4654e-04
Loss = 5.1254e-01, PNorm = 45.5879, GNorm = 1.5847, lr_0 = 8.4596e-04
Loss = 4.6496e-01, PNorm = 45.6088, GNorm = 1.2087, lr_0 = 8.4538e-04
Loss = 5.2515e-01, PNorm = 45.6287, GNorm = 1.1503, lr_0 = 8.4480e-04
Loss = 5.1307e-01, PNorm = 45.6525, GNorm = 1.6507, lr_0 = 8.4423e-04
Loss = 5.6387e-01, PNorm = 45.6717, GNorm = 1.3390, lr_0 = 8.4365e-04
Loss = 5.0503e-01, PNorm = 45.6924, GNorm = 1.3586, lr_0 = 8.4307e-04
Loss = 5.5204e-01, PNorm = 45.7142, GNorm = 1.6488, lr_0 = 8.4249e-04
Loss = 4.5907e-01, PNorm = 45.7367, GNorm = 1.5517, lr_0 = 8.4191e-04
Loss = 4.5883e-01, PNorm = 45.7525, GNorm = 1.6350, lr_0 = 8.4134e-04
Loss = 4.4045e-01, PNorm = 45.7709, GNorm = 1.1086, lr_0 = 8.4076e-04
Loss = 4.7062e-01, PNorm = 45.7979, GNorm = 2.5108, lr_0 = 8.4019e-04
Loss = 4.1587e-01, PNorm = 45.8130, GNorm = 1.8565, lr_0 = 8.3961e-04
Loss = 4.4729e-01, PNorm = 45.8211, GNorm = 0.8933, lr_0 = 8.3903e-04
Loss = 4.5797e-01, PNorm = 45.8410, GNorm = 1.1668, lr_0 = 8.3846e-04
Loss = 5.0440e-01, PNorm = 45.8667, GNorm = 2.5486, lr_0 = 8.3789e-04
Loss = 4.9084e-01, PNorm = 45.8894, GNorm = 1.6659, lr_0 = 8.3731e-04
Loss = 5.7040e-01, PNorm = 45.9215, GNorm = 1.4615, lr_0 = 8.3674e-04
Loss = 4.8993e-01, PNorm = 45.9390, GNorm = 1.4860, lr_0 = 8.3616e-04
Loss = 5.0024e-01, PNorm = 45.9514, GNorm = 2.0153, lr_0 = 8.3559e-04
Loss = 4.7048e-01, PNorm = 45.9657, GNorm = 1.5619, lr_0 = 8.3502e-04
Loss = 5.6116e-01, PNorm = 45.9866, GNorm = 1.4581, lr_0 = 8.3445e-04
Loss = 4.6626e-01, PNorm = 46.0032, GNorm = 1.0848, lr_0 = 8.3388e-04
Loss = 5.1618e-01, PNorm = 46.0291, GNorm = 3.3691, lr_0 = 8.3330e-04
Loss = 4.9466e-01, PNorm = 46.0520, GNorm = 1.2243, lr_0 = 8.3273e-04
Loss = 4.9600e-01, PNorm = 46.0762, GNorm = 1.4149, lr_0 = 8.3216e-04
Loss = 4.9849e-01, PNorm = 46.0967, GNorm = 1.5342, lr_0 = 8.3159e-04
Loss = 5.0464e-01, PNorm = 46.1175, GNorm = 0.9799, lr_0 = 8.3102e-04
Loss = 4.8151e-01, PNorm = 46.1377, GNorm = 1.5651, lr_0 = 8.3045e-04
Loss = 5.0301e-01, PNorm = 46.1595, GNorm = 1.8840, lr_0 = 8.2988e-04
Loss = 4.3990e-01, PNorm = 46.1745, GNorm = 1.9344, lr_0 = 8.2932e-04
Loss = 4.4444e-01, PNorm = 46.1982, GNorm = 1.6387, lr_0 = 8.2875e-04
Loss = 5.4316e-01, PNorm = 46.2261, GNorm = 2.0241, lr_0 = 8.2818e-04
Loss = 4.0651e-01, PNorm = 46.2552, GNorm = 1.5267, lr_0 = 8.2761e-04
Loss = 4.9673e-01, PNorm = 46.2739, GNorm = 1.5212, lr_0 = 8.2705e-04
Loss = 4.7684e-01, PNorm = 46.2817, GNorm = 3.0648, lr_0 = 8.2648e-04
Loss = 4.9515e-01, PNorm = 46.3081, GNorm = 1.0910, lr_0 = 8.2591e-04
Loss = 4.4366e-01, PNorm = 46.3246, GNorm = 1.4225, lr_0 = 8.2535e-04
Loss = 4.7597e-01, PNorm = 46.3396, GNorm = 1.1076, lr_0 = 8.2478e-04
Loss = 5.2503e-01, PNorm = 46.3579, GNorm = 1.4909, lr_0 = 8.2422e-04
Loss = 4.7921e-01, PNorm = 46.3826, GNorm = 1.6049, lr_0 = 8.2365e-04
Loss = 4.1030e-01, PNorm = 46.3988, GNorm = 1.3314, lr_0 = 8.2309e-04
Loss = 5.0518e-01, PNorm = 46.4158, GNorm = 1.4624, lr_0 = 8.2252e-04
Loss = 5.0429e-01, PNorm = 46.4388, GNorm = 1.6536, lr_0 = 8.2196e-04
Loss = 4.7708e-01, PNorm = 46.4550, GNorm = 1.1234, lr_0 = 8.2140e-04
Loss = 4.7502e-01, PNorm = 46.4863, GNorm = 1.7108, lr_0 = 8.2084e-04
Loss = 5.4390e-01, PNorm = 46.5161, GNorm = 1.4249, lr_0 = 8.2027e-04
Loss = 4.6024e-01, PNorm = 46.5347, GNorm = 1.7821, lr_0 = 8.1971e-04
Loss = 4.7446e-01, PNorm = 46.5427, GNorm = 2.3579, lr_0 = 8.1915e-04
Loss = 4.7652e-01, PNorm = 46.5544, GNorm = 1.5593, lr_0 = 8.1859e-04
Loss = 4.8420e-01, PNorm = 46.5756, GNorm = 2.7551, lr_0 = 8.1803e-04
Loss = 4.5890e-01, PNorm = 46.5906, GNorm = 1.4551, lr_0 = 8.1747e-04
Loss = 4.2232e-01, PNorm = 46.6101, GNorm = 3.9548, lr_0 = 8.1691e-04
Loss = 5.1372e-01, PNorm = 46.6371, GNorm = 1.5015, lr_0 = 8.1635e-04
Loss = 5.2158e-01, PNorm = 46.6526, GNorm = 1.4445, lr_0 = 8.1579e-04
Loss = 4.9660e-01, PNorm = 46.6665, GNorm = 1.7062, lr_0 = 8.1523e-04
Loss = 4.4111e-01, PNorm = 46.6878, GNorm = 0.9552, lr_0 = 8.1467e-04
Loss = 4.6334e-01, PNorm = 46.7110, GNorm = 1.4693, lr_0 = 8.1411e-04
Loss = 4.6593e-01, PNorm = 46.7293, GNorm = 1.2126, lr_0 = 8.1355e-04
Loss = 4.9442e-01, PNorm = 46.7451, GNorm = 1.5435, lr_0 = 8.1300e-04
Loss = 4.8188e-01, PNorm = 46.7570, GNorm = 1.8633, lr_0 = 8.1244e-04
Loss = 4.8722e-01, PNorm = 46.7769, GNorm = 2.6700, lr_0 = 8.1188e-04
Loss = 4.6943e-01, PNorm = 46.7951, GNorm = 2.6123, lr_0 = 8.1133e-04
Loss = 4.7921e-01, PNorm = 46.8198, GNorm = 1.7835, lr_0 = 8.1077e-04
Loss = 5.2742e-01, PNorm = 46.8516, GNorm = 3.1917, lr_0 = 8.1022e-04
Loss = 4.8396e-01, PNorm = 46.8707, GNorm = 0.9288, lr_0 = 8.0966e-04
Loss = 4.9794e-01, PNorm = 46.8855, GNorm = 1.2442, lr_0 = 8.0911e-04
Loss = 4.9888e-01, PNorm = 46.8987, GNorm = 1.5104, lr_0 = 8.0855e-04
Loss = 4.3926e-01, PNorm = 46.9147, GNorm = 1.1265, lr_0 = 8.0800e-04
Loss = 4.6578e-01, PNorm = 46.9345, GNorm = 2.4195, lr_0 = 8.0745e-04
Loss = 5.7786e-01, PNorm = 46.9525, GNorm = 1.2125, lr_0 = 8.0689e-04
Loss = 4.5953e-01, PNorm = 46.9726, GNorm = 1.0442, lr_0 = 8.0634e-04
Loss = 5.3204e-01, PNorm = 47.0032, GNorm = 1.1822, lr_0 = 8.0579e-04
Loss = 4.8865e-01, PNorm = 47.0290, GNorm = 1.4010, lr_0 = 8.0523e-04
Loss = 4.5896e-01, PNorm = 47.0370, GNorm = 1.3972, lr_0 = 8.0468e-04
Loss = 5.2211e-01, PNorm = 47.0631, GNorm = 1.4635, lr_0 = 8.0413e-04
Loss = 4.6557e-01, PNorm = 47.0955, GNorm = 1.8352, lr_0 = 8.0358e-04
Loss = 5.9169e-01, PNorm = 47.1170, GNorm = 2.2894, lr_0 = 8.0303e-04
Loss = 5.0440e-01, PNorm = 47.1337, GNorm = 1.5700, lr_0 = 8.0248e-04
Loss = 5.0034e-01, PNorm = 47.1520, GNorm = 1.2036, lr_0 = 8.0193e-04
Loss = 5.1757e-01, PNorm = 47.1701, GNorm = 1.4877, lr_0 = 8.0138e-04
Loss = 5.8704e-01, PNorm = 47.2002, GNorm = 2.4171, lr_0 = 8.0083e-04
Loss = 5.0231e-01, PNorm = 47.2218, GNorm = 2.5234, lr_0 = 8.0028e-04
Loss = 4.9481e-01, PNorm = 47.2427, GNorm = 1.1612, lr_0 = 7.9974e-04
Loss = 4.4018e-01, PNorm = 47.2515, GNorm = 0.9564, lr_0 = 7.9919e-04
Loss = 4.4845e-01, PNorm = 47.2663, GNorm = 1.1876, lr_0 = 7.9864e-04
Loss = 4.6643e-01, PNorm = 47.2795, GNorm = 1.4043, lr_0 = 7.9809e-04
Loss = 5.0698e-01, PNorm = 47.2967, GNorm = 1.6622, lr_0 = 7.9755e-04
Loss = 4.9251e-01, PNorm = 47.3097, GNorm = 1.7542, lr_0 = 7.9700e-04
Loss = 5.3683e-01, PNorm = 47.3329, GNorm = 1.3724, lr_0 = 7.9645e-04
Loss = 4.7349e-01, PNorm = 47.3521, GNorm = 1.4166, lr_0 = 7.9591e-04
Loss = 4.5737e-01, PNorm = 47.3682, GNorm = 1.8379, lr_0 = 7.9536e-04
Loss = 5.3973e-01, PNorm = 47.3869, GNorm = 1.4672, lr_0 = 7.9482e-04
Loss = 4.8775e-01, PNorm = 47.4058, GNorm = 1.2315, lr_0 = 7.9427e-04
Loss = 4.7879e-01, PNorm = 47.4246, GNorm = 0.8841, lr_0 = 7.9373e-04
Loss = 4.5791e-01, PNorm = 47.4403, GNorm = 2.8067, lr_0 = 7.9319e-04
Loss = 4.8550e-01, PNorm = 47.4539, GNorm = 1.2796, lr_0 = 7.9264e-04
Loss = 4.8460e-01, PNorm = 47.4666, GNorm = 1.7786, lr_0 = 7.9210e-04
Loss = 5.5525e-01, PNorm = 47.4868, GNorm = 1.0604, lr_0 = 7.9156e-04
Loss = 5.0615e-01, PNorm = 47.5052, GNorm = 1.0274, lr_0 = 7.9101e-04
Loss = 4.3778e-01, PNorm = 47.5184, GNorm = 1.1910, lr_0 = 7.9047e-04
Loss = 4.2545e-01, PNorm = 47.5332, GNorm = 0.9997, lr_0 = 7.8993e-04
Loss = 5.2230e-01, PNorm = 47.5515, GNorm = 1.3200, lr_0 = 7.8939e-04
Loss = 5.1501e-01, PNorm = 47.5730, GNorm = 1.2453, lr_0 = 7.8885e-04
Loss = 4.6911e-01, PNorm = 47.5941, GNorm = 1.9653, lr_0 = 7.8831e-04
Loss = 4.4256e-01, PNorm = 47.6096, GNorm = 2.4175, lr_0 = 7.8777e-04
Loss = 5.2101e-01, PNorm = 47.6213, GNorm = 2.3133, lr_0 = 7.8723e-04
Loss = 4.5992e-01, PNorm = 47.6383, GNorm = 1.1279, lr_0 = 7.8669e-04
Loss = 5.0263e-01, PNorm = 47.6513, GNorm = 1.1793, lr_0 = 7.8615e-04
Loss = 4.8338e-01, PNorm = 47.6714, GNorm = 1.5860, lr_0 = 7.8561e-04
Loss = 4.5253e-01, PNorm = 47.6908, GNorm = 0.8154, lr_0 = 7.8507e-04
Loss = 5.1554e-01, PNorm = 47.7017, GNorm = 1.7582, lr_0 = 7.8454e-04
Loss = 5.1205e-01, PNorm = 47.7232, GNorm = 0.9649, lr_0 = 7.8400e-04
Loss = 4.5499e-01, PNorm = 47.7412, GNorm = 1.3860, lr_0 = 7.8346e-04
Loss = 4.4814e-01, PNorm = 47.7422, GNorm = 1.2032, lr_0 = 7.8293e-04
Loss = 4.8370e-01, PNorm = 47.7555, GNorm = 1.1845, lr_0 = 7.8239e-04
Loss = 4.6581e-01, PNorm = 47.7779, GNorm = 1.7988, lr_0 = 7.8185e-04
Loss = 4.3400e-01, PNorm = 47.7915, GNorm = 1.2609, lr_0 = 7.8132e-04
Validation mae = 0.120412
Epoch 5
Loss = 4.2214e-01, PNorm = 47.8073, GNorm = 1.1787, lr_0 = 7.8078e-04
Loss = 4.9456e-01, PNorm = 47.8294, GNorm = 1.6478, lr_0 = 7.8025e-04
Loss = 5.3199e-01, PNorm = 47.8529, GNorm = 1.9727, lr_0 = 7.7971e-04
Loss = 5.6676e-01, PNorm = 47.8801, GNorm = 2.0284, lr_0 = 7.7918e-04
Loss = 5.1496e-01, PNorm = 47.9090, GNorm = 1.3878, lr_0 = 7.7864e-04
Loss = 4.7765e-01, PNorm = 47.9242, GNorm = 1.6616, lr_0 = 7.7811e-04
Loss = 4.6978e-01, PNorm = 47.9417, GNorm = 2.3462, lr_0 = 7.7758e-04
Loss = 3.8805e-01, PNorm = 47.9590, GNorm = 0.9167, lr_0 = 7.7705e-04
Loss = 4.9931e-01, PNorm = 47.9855, GNorm = 1.3460, lr_0 = 7.7651e-04
Loss = 5.2481e-01, PNorm = 48.0046, GNorm = 1.9008, lr_0 = 7.7598e-04
Loss = 4.3752e-01, PNorm = 48.0323, GNorm = 1.2454, lr_0 = 7.7545e-04
Loss = 4.6165e-01, PNorm = 48.0532, GNorm = 1.7909, lr_0 = 7.7492e-04
Loss = 4.3824e-01, PNorm = 48.0596, GNorm = 1.1615, lr_0 = 7.7439e-04
Loss = 4.9724e-01, PNorm = 48.0683, GNorm = 2.0766, lr_0 = 7.7386e-04
Loss = 4.1408e-01, PNorm = 48.0936, GNorm = 1.5600, lr_0 = 7.7333e-04
Loss = 4.6210e-01, PNorm = 48.1049, GNorm = 1.9179, lr_0 = 7.7280e-04
Loss = 4.9211e-01, PNorm = 48.1269, GNorm = 1.2640, lr_0 = 7.7227e-04
Loss = 4.8825e-01, PNorm = 48.1397, GNorm = 1.6917, lr_0 = 7.7174e-04
Loss = 4.1249e-01, PNorm = 48.1576, GNorm = 0.9527, lr_0 = 7.7121e-04
Loss = 4.5273e-01, PNorm = 48.1782, GNorm = 0.9548, lr_0 = 7.7068e-04
Loss = 5.1311e-01, PNorm = 48.1909, GNorm = 1.3258, lr_0 = 7.7015e-04
Loss = 4.1708e-01, PNorm = 48.2125, GNorm = 1.1090, lr_0 = 7.6963e-04
Loss = 4.7366e-01, PNorm = 48.2299, GNorm = 1.1662, lr_0 = 7.6910e-04
Loss = 4.5923e-01, PNorm = 48.2460, GNorm = 1.3504, lr_0 = 7.6857e-04
Loss = 4.3437e-01, PNorm = 48.2560, GNorm = 1.2136, lr_0 = 7.6805e-04
Loss = 5.0403e-01, PNorm = 48.2692, GNorm = 1.1379, lr_0 = 7.6752e-04
Loss = 4.9471e-01, PNorm = 48.2876, GNorm = 1.9594, lr_0 = 7.6699e-04
Loss = 4.7197e-01, PNorm = 48.3075, GNorm = 1.1164, lr_0 = 7.6647e-04
Loss = 4.3555e-01, PNorm = 48.3259, GNorm = 1.7286, lr_0 = 7.6594e-04
Loss = 5.1567e-01, PNorm = 48.3423, GNorm = 2.6987, lr_0 = 7.6542e-04
Loss = 4.3814e-01, PNorm = 48.3672, GNorm = 1.7671, lr_0 = 7.6489e-04
Loss = 4.6063e-01, PNorm = 48.3887, GNorm = 1.4180, lr_0 = 7.6437e-04
Loss = 5.0889e-01, PNorm = 48.4110, GNorm = 1.6989, lr_0 = 7.6385e-04
Loss = 4.8505e-01, PNorm = 48.4256, GNorm = 1.3563, lr_0 = 7.6332e-04
Loss = 4.8019e-01, PNorm = 48.4417, GNorm = 1.7690, lr_0 = 7.6280e-04
Loss = 4.1761e-01, PNorm = 48.4501, GNorm = 1.8766, lr_0 = 7.6228e-04
Loss = 4.9211e-01, PNorm = 48.4671, GNorm = 2.2154, lr_0 = 7.6176e-04
Loss = 5.1566e-01, PNorm = 48.4791, GNorm = 1.5082, lr_0 = 7.6123e-04
Loss = 4.9050e-01, PNorm = 48.5016, GNorm = 1.5658, lr_0 = 7.6071e-04
Loss = 4.9626e-01, PNorm = 48.5246, GNorm = 1.8495, lr_0 = 7.6019e-04
Loss = 4.8384e-01, PNorm = 48.5416, GNorm = 1.4582, lr_0 = 7.5967e-04
Loss = 4.6189e-01, PNorm = 48.5581, GNorm = 1.2284, lr_0 = 7.5915e-04
Loss = 4.0374e-01, PNorm = 48.5732, GNorm = 1.0942, lr_0 = 7.5863e-04
Loss = 5.1487e-01, PNorm = 48.5882, GNorm = 1.6726, lr_0 = 7.5811e-04
Loss = 4.2238e-01, PNorm = 48.6131, GNorm = 1.3026, lr_0 = 7.5759e-04
Loss = 4.3049e-01, PNorm = 48.6291, GNorm = 1.2981, lr_0 = 7.5707e-04
Loss = 4.4439e-01, PNorm = 48.6340, GNorm = 1.0746, lr_0 = 7.5655e-04
Loss = 4.5108e-01, PNorm = 48.6454, GNorm = 0.9604, lr_0 = 7.5603e-04
Loss = 4.2278e-01, PNorm = 48.6625, GNorm = 1.1403, lr_0 = 7.5552e-04
Loss = 4.3427e-01, PNorm = 48.6770, GNorm = 1.5285, lr_0 = 7.5500e-04
Loss = 4.7565e-01, PNorm = 48.6987, GNorm = 1.0616, lr_0 = 7.5448e-04
Loss = 4.5388e-01, PNorm = 48.7178, GNorm = 1.5084, lr_0 = 7.5397e-04
Loss = 5.1448e-01, PNorm = 48.7352, GNorm = 1.4461, lr_0 = 7.5345e-04
Loss = 4.6389e-01, PNorm = 48.7565, GNorm = 2.1509, lr_0 = 7.5293e-04
Loss = 5.1404e-01, PNorm = 48.7783, GNorm = 2.0008, lr_0 = 7.5242e-04
Loss = 5.2054e-01, PNorm = 48.7877, GNorm = 1.4893, lr_0 = 7.5190e-04
Loss = 5.0276e-01, PNorm = 48.8037, GNorm = 0.8985, lr_0 = 7.5139e-04
Loss = 4.3674e-01, PNorm = 48.8229, GNorm = 1.3465, lr_0 = 7.5087e-04
Loss = 4.4359e-01, PNorm = 48.8355, GNorm = 2.3015, lr_0 = 7.5036e-04
Loss = 4.3288e-01, PNorm = 48.8487, GNorm = 1.5101, lr_0 = 7.4984e-04
Loss = 3.8210e-01, PNorm = 48.8636, GNorm = 1.0879, lr_0 = 7.4933e-04
Loss = 4.4404e-01, PNorm = 48.8789, GNorm = 1.2636, lr_0 = 7.4882e-04
Loss = 5.1067e-01, PNorm = 48.8986, GNorm = 1.9745, lr_0 = 7.4830e-04
Loss = 4.4824e-01, PNorm = 48.9146, GNorm = 2.7607, lr_0 = 7.4779e-04
Loss = 4.5880e-01, PNorm = 48.9361, GNorm = 1.7700, lr_0 = 7.4728e-04
Loss = 4.6445e-01, PNorm = 48.9590, GNorm = 1.4976, lr_0 = 7.4677e-04
Loss = 5.0096e-01, PNorm = 48.9805, GNorm = 1.0033, lr_0 = 7.4625e-04
Loss = 4.7870e-01, PNorm = 49.0022, GNorm = 1.8119, lr_0 = 7.4574e-04
Loss = 5.0467e-01, PNorm = 49.0216, GNorm = 1.2848, lr_0 = 7.4523e-04
Loss = 5.0428e-01, PNorm = 49.0413, GNorm = 1.0996, lr_0 = 7.4472e-04
Loss = 4.3702e-01, PNorm = 49.0571, GNorm = 2.0340, lr_0 = 7.4421e-04
Loss = 4.4361e-01, PNorm = 49.0655, GNorm = 1.6439, lr_0 = 7.4370e-04
Loss = 4.3215e-01, PNorm = 49.0825, GNorm = 1.9372, lr_0 = 7.4319e-04
Loss = 4.7791e-01, PNorm = 49.1070, GNorm = 1.2419, lr_0 = 7.4268e-04
Loss = 4.3946e-01, PNorm = 49.1196, GNorm = 1.5969, lr_0 = 7.4217e-04
Loss = 4.3405e-01, PNorm = 49.1428, GNorm = 1.3143, lr_0 = 7.4167e-04
Loss = 4.2342e-01, PNorm = 49.1649, GNorm = 1.1438, lr_0 = 7.4116e-04
Loss = 4.4468e-01, PNorm = 49.1820, GNorm = 1.9934, lr_0 = 7.4065e-04
Loss = 4.3973e-01, PNorm = 49.1873, GNorm = 1.4635, lr_0 = 7.4014e-04
Loss = 4.6103e-01, PNorm = 49.1979, GNorm = 1.6243, lr_0 = 7.3964e-04
Loss = 4.5278e-01, PNorm = 49.2155, GNorm = 1.5635, lr_0 = 7.3913e-04
Loss = 4.5270e-01, PNorm = 49.2310, GNorm = 1.8686, lr_0 = 7.3862e-04
Loss = 4.9227e-01, PNorm = 49.2465, GNorm = 1.5417, lr_0 = 7.3812e-04
Loss = 4.8093e-01, PNorm = 49.2682, GNorm = 2.3801, lr_0 = 7.3761e-04
Loss = 4.6148e-01, PNorm = 49.2901, GNorm = 1.7278, lr_0 = 7.3711e-04
Loss = 4.8896e-01, PNorm = 49.3139, GNorm = 1.5336, lr_0 = 7.3660e-04
Loss = 4.8690e-01, PNorm = 49.3317, GNorm = 1.7840, lr_0 = 7.3610e-04
Loss = 4.3921e-01, PNorm = 49.3455, GNorm = 1.2066, lr_0 = 7.3559e-04
Loss = 4.4084e-01, PNorm = 49.3673, GNorm = 1.9240, lr_0 = 7.3509e-04
Loss = 4.6819e-01, PNorm = 49.3872, GNorm = 1.6620, lr_0 = 7.3458e-04
Loss = 5.3429e-01, PNorm = 49.4015, GNorm = 1.3434, lr_0 = 7.3408e-04
Loss = 4.5270e-01, PNorm = 49.4205, GNorm = 1.1294, lr_0 = 7.3358e-04
Loss = 4.7749e-01, PNorm = 49.4375, GNorm = 2.4463, lr_0 = 7.3308e-04
Loss = 5.1186e-01, PNorm = 49.4483, GNorm = 1.2919, lr_0 = 7.3257e-04
Loss = 4.5769e-01, PNorm = 49.4579, GNorm = 1.7082, lr_0 = 7.3207e-04
Loss = 5.1406e-01, PNorm = 49.4783, GNorm = 1.5996, lr_0 = 7.3157e-04
Loss = 4.9634e-01, PNorm = 49.5060, GNorm = 2.0339, lr_0 = 7.3107e-04
Loss = 5.0968e-01, PNorm = 49.5229, GNorm = 1.0394, lr_0 = 7.3057e-04
Loss = 4.4109e-01, PNorm = 49.5391, GNorm = 2.2049, lr_0 = 7.3007e-04
Loss = 4.1638e-01, PNorm = 49.5625, GNorm = 1.2861, lr_0 = 7.2957e-04
Loss = 5.0994e-01, PNorm = 49.5709, GNorm = 3.8883, lr_0 = 7.2907e-04
Loss = 4.5327e-01, PNorm = 49.5822, GNorm = 1.2485, lr_0 = 7.2857e-04
Loss = 4.4632e-01, PNorm = 49.6032, GNorm = 2.0701, lr_0 = 7.2807e-04
Loss = 4.5756e-01, PNorm = 49.6231, GNorm = 1.5730, lr_0 = 7.2757e-04
Loss = 5.0244e-01, PNorm = 49.6328, GNorm = 1.0074, lr_0 = 7.2707e-04
Loss = 5.5173e-01, PNorm = 49.6496, GNorm = 2.2382, lr_0 = 7.2657e-04
Loss = 3.6858e-01, PNorm = 49.6717, GNorm = 0.8744, lr_0 = 7.2608e-04
Loss = 4.9938e-01, PNorm = 49.6866, GNorm = 1.5044, lr_0 = 7.2558e-04
Loss = 4.7251e-01, PNorm = 49.7007, GNorm = 1.0621, lr_0 = 7.2508e-04
Loss = 5.1379e-01, PNorm = 49.7193, GNorm = 1.4705, lr_0 = 7.2458e-04
Loss = 4.5741e-01, PNorm = 49.7319, GNorm = 1.3317, lr_0 = 7.2409e-04
Loss = 5.2263e-01, PNorm = 49.7554, GNorm = 1.7470, lr_0 = 7.2359e-04
Loss = 4.7454e-01, PNorm = 49.7657, GNorm = 1.2408, lr_0 = 7.2310e-04
Loss = 5.3462e-01, PNorm = 49.7825, GNorm = 1.0181, lr_0 = 7.2260e-04
Loss = 5.0335e-01, PNorm = 49.8070, GNorm = 1.1664, lr_0 = 7.2211e-04
Loss = 4.3058e-01, PNorm = 49.8221, GNorm = 1.2399, lr_0 = 7.2161e-04
Loss = 4.5060e-01, PNorm = 49.8368, GNorm = 1.3561, lr_0 = 7.2112e-04
Loss = 4.3827e-01, PNorm = 49.8513, GNorm = 1.8770, lr_0 = 7.2062e-04
Loss = 4.8102e-01, PNorm = 49.8670, GNorm = 1.7509, lr_0 = 7.2013e-04
Loss = 4.5815e-01, PNorm = 49.8820, GNorm = 1.4462, lr_0 = 7.1964e-04
Validation mae = 0.119483
Epoch 6
Loss = 4.0633e-01, PNorm = 49.8979, GNorm = 1.2454, lr_0 = 7.1914e-04
Loss = 4.0171e-01, PNorm = 49.9117, GNorm = 1.2969, lr_0 = 7.1865e-04
Loss = 4.3848e-01, PNorm = 49.9231, GNorm = 1.9726, lr_0 = 7.1816e-04
Loss = 4.7051e-01, PNorm = 49.9430, GNorm = 1.9954, lr_0 = 7.1767e-04
Loss = 4.4324e-01, PNorm = 49.9560, GNorm = 1.3926, lr_0 = 7.1717e-04
Loss = 4.6816e-01, PNorm = 49.9774, GNorm = 1.2540, lr_0 = 7.1668e-04
Loss = 4.1345e-01, PNorm = 49.9960, GNorm = 1.3526, lr_0 = 7.1619e-04
Loss = 4.2133e-01, PNorm = 50.0188, GNorm = 1.9836, lr_0 = 7.1570e-04
Loss = 5.0983e-01, PNorm = 50.0322, GNorm = 1.1300, lr_0 = 7.1521e-04
Loss = 4.3227e-01, PNorm = 50.0408, GNorm = 1.3491, lr_0 = 7.1472e-04
Loss = 4.8775e-01, PNorm = 50.0577, GNorm = 2.0953, lr_0 = 7.1423e-04
Loss = 4.6229e-01, PNorm = 50.0748, GNorm = 1.1210, lr_0 = 7.1374e-04
Loss = 4.5804e-01, PNorm = 50.0976, GNorm = 1.0459, lr_0 = 7.1325e-04
Loss = 4.3693e-01, PNorm = 50.1124, GNorm = 1.1171, lr_0 = 7.1277e-04
Loss = 4.2698e-01, PNorm = 50.1243, GNorm = 1.5057, lr_0 = 7.1228e-04
Loss = 4.0748e-01, PNorm = 50.1342, GNorm = 0.9242, lr_0 = 7.1179e-04
Loss = 5.0059e-01, PNorm = 50.1496, GNorm = 0.9042, lr_0 = 7.1130e-04
Loss = 4.4428e-01, PNorm = 50.1611, GNorm = 1.1931, lr_0 = 7.1081e-04
Loss = 4.2806e-01, PNorm = 50.1689, GNorm = 1.6763, lr_0 = 7.1033e-04
Loss = 4.4636e-01, PNorm = 50.1878, GNorm = 1.2995, lr_0 = 7.0984e-04
Loss = 4.7345e-01, PNorm = 50.2001, GNorm = 1.8158, lr_0 = 7.0935e-04
Loss = 4.0715e-01, PNorm = 50.2147, GNorm = 1.7529, lr_0 = 7.0887e-04
Loss = 4.5998e-01, PNorm = 50.2299, GNorm = 1.3970, lr_0 = 7.0838e-04
Loss = 4.6060e-01, PNorm = 50.2546, GNorm = 1.7458, lr_0 = 7.0790e-04
Loss = 4.4363e-01, PNorm = 50.2692, GNorm = 1.4899, lr_0 = 7.0741e-04
Loss = 4.8240e-01, PNorm = 50.2898, GNorm = 1.7105, lr_0 = 7.0693e-04
Loss = 4.6455e-01, PNorm = 50.3089, GNorm = 2.4651, lr_0 = 7.0644e-04
Loss = 4.1674e-01, PNorm = 50.3198, GNorm = 1.0198, lr_0 = 7.0596e-04
Loss = 4.5059e-01, PNorm = 50.3357, GNorm = 1.6572, lr_0 = 7.0548e-04
Loss = 5.1097e-01, PNorm = 50.3541, GNorm = 1.0856, lr_0 = 7.0499e-04
Loss = 5.0387e-01, PNorm = 50.3697, GNorm = 1.5461, lr_0 = 7.0451e-04
Loss = 3.9444e-01, PNorm = 50.3904, GNorm = 1.3935, lr_0 = 7.0403e-04
Loss = 5.2282e-01, PNorm = 50.4045, GNorm = 2.1350, lr_0 = 7.0354e-04
Loss = 4.5082e-01, PNorm = 50.4191, GNorm = 1.9112, lr_0 = 7.0306e-04
Loss = 4.7622e-01, PNorm = 50.4298, GNorm = 1.0733, lr_0 = 7.0258e-04
Loss = 4.3970e-01, PNorm = 50.4447, GNorm = 1.2867, lr_0 = 7.0210e-04
Loss = 4.0886e-01, PNorm = 50.4612, GNorm = 1.2280, lr_0 = 7.0162e-04
Loss = 4.4871e-01, PNorm = 50.4785, GNorm = 1.3788, lr_0 = 7.0114e-04
Loss = 4.5309e-01, PNorm = 50.4924, GNorm = 1.3375, lr_0 = 7.0066e-04
Loss = 4.7537e-01, PNorm = 50.5048, GNorm = 2.2967, lr_0 = 7.0018e-04
Loss = 4.4257e-01, PNorm = 50.5170, GNorm = 1.0792, lr_0 = 6.9970e-04
Loss = 4.7466e-01, PNorm = 50.5338, GNorm = 1.5495, lr_0 = 6.9922e-04
Loss = 4.4431e-01, PNorm = 50.5474, GNorm = 1.8043, lr_0 = 6.9874e-04
Loss = 4.4833e-01, PNorm = 50.5574, GNorm = 2.0910, lr_0 = 6.9826e-04
Loss = 4.6224e-01, PNorm = 50.5699, GNorm = 3.0282, lr_0 = 6.9778e-04
Loss = 4.1759e-01, PNorm = 50.5818, GNorm = 1.2982, lr_0 = 6.9730e-04
Loss = 3.8684e-01, PNorm = 50.5963, GNorm = 1.1689, lr_0 = 6.9683e-04
Loss = 4.3384e-01, PNorm = 50.6093, GNorm = 1.2347, lr_0 = 6.9635e-04
Loss = 4.4122e-01, PNorm = 50.6243, GNorm = 1.5172, lr_0 = 6.9587e-04
Loss = 4.5930e-01, PNorm = 50.6427, GNorm = 1.3439, lr_0 = 6.9540e-04
Loss = 4.4397e-01, PNorm = 50.6575, GNorm = 1.1688, lr_0 = 6.9492e-04
Loss = 4.1633e-01, PNorm = 50.6706, GNorm = 0.9816, lr_0 = 6.9444e-04
Loss = 4.5131e-01, PNorm = 50.6894, GNorm = 1.1759, lr_0 = 6.9397e-04
Loss = 5.1263e-01, PNorm = 50.7027, GNorm = 1.6808, lr_0 = 6.9349e-04
Loss = 4.6222e-01, PNorm = 50.7190, GNorm = 1.1441, lr_0 = 6.9302e-04
Loss = 5.0344e-01, PNorm = 50.7363, GNorm = 1.3114, lr_0 = 6.9254e-04
Loss = 4.5102e-01, PNorm = 50.7532, GNorm = 0.9108, lr_0 = 6.9207e-04
Loss = 4.4966e-01, PNorm = 50.7594, GNorm = 1.2662, lr_0 = 6.9159e-04
Loss = 4.2734e-01, PNorm = 50.7711, GNorm = 1.6045, lr_0 = 6.9112e-04
Loss = 4.5961e-01, PNorm = 50.7854, GNorm = 1.0979, lr_0 = 6.9065e-04
Loss = 4.6694e-01, PNorm = 50.7978, GNorm = 1.3815, lr_0 = 6.9017e-04
Loss = 4.2574e-01, PNorm = 50.8063, GNorm = 1.1962, lr_0 = 6.8970e-04
Loss = 4.0616e-01, PNorm = 50.8120, GNorm = 0.9339, lr_0 = 6.8923e-04
Loss = 4.3152e-01, PNorm = 50.8264, GNorm = 2.0981, lr_0 = 6.8876e-04
Loss = 5.1809e-01, PNorm = 50.8445, GNorm = 1.1515, lr_0 = 6.8828e-04
Loss = 4.8459e-01, PNorm = 50.8541, GNorm = 1.7193, lr_0 = 6.8781e-04
Loss = 4.5581e-01, PNorm = 50.8695, GNorm = 1.0229, lr_0 = 6.8734e-04
Loss = 4.4859e-01, PNorm = 50.8878, GNorm = 1.1679, lr_0 = 6.8687e-04
Loss = 4.9912e-01, PNorm = 50.9060, GNorm = 2.2623, lr_0 = 6.8640e-04
Loss = 5.4428e-01, PNorm = 50.9322, GNorm = 1.6230, lr_0 = 6.8593e-04
Loss = 5.0067e-01, PNorm = 50.9557, GNorm = 1.4839, lr_0 = 6.8546e-04
Loss = 4.6113e-01, PNorm = 50.9645, GNorm = 1.3716, lr_0 = 6.8499e-04
Loss = 4.5104e-01, PNorm = 50.9771, GNorm = 1.4690, lr_0 = 6.8452e-04
Loss = 4.8733e-01, PNorm = 50.9903, GNorm = 3.5553, lr_0 = 6.8405e-04
Loss = 4.5911e-01, PNorm = 51.0057, GNorm = 1.3523, lr_0 = 6.8358e-04
Loss = 4.8306e-01, PNorm = 51.0244, GNorm = 1.4291, lr_0 = 6.8312e-04
Loss = 4.5711e-01, PNorm = 51.0425, GNorm = 2.1127, lr_0 = 6.8265e-04
Loss = 4.8553e-01, PNorm = 51.0706, GNorm = 1.9590, lr_0 = 6.8218e-04
Loss = 4.6906e-01, PNorm = 51.0859, GNorm = 1.0386, lr_0 = 6.8171e-04
Loss = 4.4909e-01, PNorm = 51.0995, GNorm = 1.2702, lr_0 = 6.8125e-04
Loss = 4.6011e-01, PNorm = 51.1194, GNorm = 1.2607, lr_0 = 6.8078e-04
Loss = 4.6294e-01, PNorm = 51.1345, GNorm = 1.4330, lr_0 = 6.8031e-04
Loss = 4.6397e-01, PNorm = 51.1475, GNorm = 1.1213, lr_0 = 6.7985e-04
Loss = 5.0008e-01, PNorm = 51.1626, GNorm = 2.0406, lr_0 = 6.7938e-04
Loss = 4.8706e-01, PNorm = 51.1787, GNorm = 1.7591, lr_0 = 6.7892e-04
Loss = 4.5544e-01, PNorm = 51.1951, GNorm = 0.9198, lr_0 = 6.7845e-04
Loss = 4.4932e-01, PNorm = 51.2123, GNorm = 1.4722, lr_0 = 6.7799e-04
Loss = 4.3851e-01, PNorm = 51.2322, GNorm = 1.1569, lr_0 = 6.7752e-04
Loss = 4.1969e-01, PNorm = 51.2418, GNorm = 1.2955, lr_0 = 6.7706e-04
Loss = 4.5907e-01, PNorm = 51.2562, GNorm = 0.9077, lr_0 = 6.7659e-04
Loss = 3.8823e-01, PNorm = 51.2673, GNorm = 1.2274, lr_0 = 6.7613e-04
Loss = 4.3841e-01, PNorm = 51.2721, GNorm = 1.0449, lr_0 = 6.7567e-04
Loss = 4.6874e-01, PNorm = 51.2832, GNorm = 1.2798, lr_0 = 6.7520e-04
Loss = 5.2191e-01, PNorm = 51.2974, GNorm = 1.4759, lr_0 = 6.7474e-04
Loss = 4.8976e-01, PNorm = 51.3213, GNorm = 1.7010, lr_0 = 6.7428e-04
Loss = 3.9527e-01, PNorm = 51.3343, GNorm = 1.7443, lr_0 = 6.7382e-04
Loss = 4.6280e-01, PNorm = 51.3409, GNorm = 1.2347, lr_0 = 6.7335e-04
Loss = 5.0935e-01, PNorm = 51.3520, GNorm = 2.0057, lr_0 = 6.7289e-04
Loss = 4.4217e-01, PNorm = 51.3729, GNorm = 0.8529, lr_0 = 6.7243e-04
Loss = 4.5630e-01, PNorm = 51.3883, GNorm = 2.2083, lr_0 = 6.7197e-04
Loss = 4.3827e-01, PNorm = 51.3984, GNorm = 1.0133, lr_0 = 6.7151e-04
Loss = 4.8055e-01, PNorm = 51.4090, GNorm = 1.0836, lr_0 = 6.7105e-04
Loss = 4.3550e-01, PNorm = 51.4230, GNorm = 1.6198, lr_0 = 6.7059e-04
Loss = 4.8664e-01, PNorm = 51.4386, GNorm = 1.4599, lr_0 = 6.7013e-04
Loss = 5.0901e-01, PNorm = 51.4488, GNorm = 1.2946, lr_0 = 6.6967e-04
Loss = 5.1490e-01, PNorm = 51.4631, GNorm = 2.3351, lr_0 = 6.6921e-04
Loss = 3.8598e-01, PNorm = 51.4793, GNorm = 1.2325, lr_0 = 6.6876e-04
Loss = 4.3586e-01, PNorm = 51.4915, GNorm = 1.4156, lr_0 = 6.6830e-04
Loss = 4.8865e-01, PNorm = 51.5041, GNorm = 0.9286, lr_0 = 6.6784e-04
Loss = 4.3842e-01, PNorm = 51.5159, GNorm = 1.1663, lr_0 = 6.6738e-04
Loss = 3.9549e-01, PNorm = 51.5319, GNorm = 1.0805, lr_0 = 6.6693e-04
Loss = 4.4585e-01, PNorm = 51.5355, GNorm = 1.0724, lr_0 = 6.6647e-04
Loss = 5.1916e-01, PNorm = 51.5413, GNorm = 1.1788, lr_0 = 6.6601e-04
Loss = 4.5597e-01, PNorm = 51.5525, GNorm = 1.2115, lr_0 = 6.6556e-04
Loss = 4.2057e-01, PNorm = 51.5646, GNorm = 1.4332, lr_0 = 6.6510e-04
Loss = 4.4823e-01, PNorm = 51.5742, GNorm = 1.8738, lr_0 = 6.6464e-04
Loss = 4.8590e-01, PNorm = 51.5894, GNorm = 1.0082, lr_0 = 6.6419e-04
Loss = 4.3786e-01, PNorm = 51.6050, GNorm = 1.4622, lr_0 = 6.6373e-04
Loss = 4.7800e-01, PNorm = 51.6243, GNorm = 1.6276, lr_0 = 6.6328e-04
Loss = 4.6901e-01, PNorm = 51.6441, GNorm = 1.7862, lr_0 = 6.6282e-04
Validation mae = 0.118327
Epoch 7
Loss = 4.6725e-01, PNorm = 51.6602, GNorm = 1.7617, lr_0 = 6.6237e-04
Loss = 4.7142e-01, PNorm = 51.6709, GNorm = 2.4473, lr_0 = 6.6192e-04
Loss = 4.7209e-01, PNorm = 51.6891, GNorm = 1.8892, lr_0 = 6.6146e-04
Loss = 4.7114e-01, PNorm = 51.7070, GNorm = 1.7114, lr_0 = 6.6101e-04
Loss = 4.4903e-01, PNorm = 51.7201, GNorm = 1.2807, lr_0 = 6.6056e-04
Loss = 5.0747e-01, PNorm = 51.7325, GNorm = 1.4706, lr_0 = 6.6011e-04
Loss = 4.7185e-01, PNorm = 51.7478, GNorm = 1.5486, lr_0 = 6.5965e-04
Loss = 4.7748e-01, PNorm = 51.7640, GNorm = 1.7851, lr_0 = 6.5920e-04
Loss = 4.8744e-01, PNorm = 51.7819, GNorm = 1.3865, lr_0 = 6.5875e-04
Loss = 3.9456e-01, PNorm = 51.8032, GNorm = 1.1005, lr_0 = 6.5830e-04
Loss = 4.1415e-01, PNorm = 51.8190, GNorm = 1.8509, lr_0 = 6.5785e-04
Loss = 4.4432e-01, PNorm = 51.8364, GNorm = 1.6669, lr_0 = 6.5740e-04
Loss = 4.3939e-01, PNorm = 51.8535, GNorm = 1.1336, lr_0 = 6.5695e-04
Loss = 4.6065e-01, PNorm = 51.8708, GNorm = 1.2257, lr_0 = 6.5650e-04
Loss = 5.1541e-01, PNorm = 51.8915, GNorm = 1.1567, lr_0 = 6.5605e-04
Loss = 4.3932e-01, PNorm = 51.9031, GNorm = 1.2891, lr_0 = 6.5560e-04
Loss = 4.8185e-01, PNorm = 51.9122, GNorm = 1.7188, lr_0 = 6.5515e-04
Loss = 4.5785e-01, PNorm = 51.9257, GNorm = 1.2173, lr_0 = 6.5470e-04
Loss = 4.1084e-01, PNorm = 51.9478, GNorm = 1.4291, lr_0 = 6.5425e-04
Loss = 4.4946e-01, PNorm = 51.9568, GNorm = 1.0287, lr_0 = 6.5380e-04
Loss = 4.6614e-01, PNorm = 51.9677, GNorm = 1.1570, lr_0 = 6.5335e-04
Loss = 4.8451e-01, PNorm = 51.9819, GNorm = 1.4402, lr_0 = 6.5291e-04
Loss = 4.6222e-01, PNorm = 51.9991, GNorm = 0.9416, lr_0 = 6.5246e-04
Loss = 3.7555e-01, PNorm = 52.0083, GNorm = 1.3633, lr_0 = 6.5201e-04
Loss = 4.4997e-01, PNorm = 52.0121, GNorm = 1.0881, lr_0 = 6.5157e-04
Loss = 3.8085e-01, PNorm = 52.0254, GNorm = 1.1403, lr_0 = 6.5112e-04
Loss = 4.5024e-01, PNorm = 52.0391, GNorm = 1.3832, lr_0 = 6.5067e-04
Loss = 4.5947e-01, PNorm = 52.0482, GNorm = 1.8108, lr_0 = 6.5023e-04
Loss = 4.6686e-01, PNorm = 52.0660, GNorm = 1.2233, lr_0 = 6.4978e-04
Loss = 4.5797e-01, PNorm = 52.0870, GNorm = 1.7024, lr_0 = 6.4934e-04
Loss = 4.0391e-01, PNorm = 52.1012, GNorm = 1.5795, lr_0 = 6.4889e-04
Loss = 4.2142e-01, PNorm = 52.1148, GNorm = 1.2766, lr_0 = 6.4845e-04
Loss = 4.2306e-01, PNorm = 52.1352, GNorm = 2.0894, lr_0 = 6.4800e-04
Loss = 4.3594e-01, PNorm = 52.1493, GNorm = 1.5286, lr_0 = 6.4756e-04
Loss = 3.9454e-01, PNorm = 52.1630, GNorm = 1.1202, lr_0 = 6.4712e-04
Loss = 5.2301e-01, PNorm = 52.1781, GNorm = 1.4252, lr_0 = 6.4667e-04
Loss = 4.4121e-01, PNorm = 52.1896, GNorm = 1.3724, lr_0 = 6.4623e-04
Loss = 4.1992e-01, PNorm = 52.2077, GNorm = 1.1671, lr_0 = 6.4579e-04
Loss = 3.9836e-01, PNorm = 52.2190, GNorm = 1.2161, lr_0 = 6.4534e-04
Loss = 3.9685e-01, PNorm = 52.2316, GNorm = 1.4233, lr_0 = 6.4490e-04
Loss = 4.1849e-01, PNorm = 52.2437, GNorm = 1.3993, lr_0 = 6.4446e-04
Loss = 4.6132e-01, PNorm = 52.2515, GNorm = 1.3063, lr_0 = 6.4402e-04
Loss = 4.2281e-01, PNorm = 52.2673, GNorm = 2.3203, lr_0 = 6.4358e-04
Loss = 4.5980e-01, PNorm = 52.2725, GNorm = 1.9854, lr_0 = 6.4314e-04
Loss = 4.5775e-01, PNorm = 52.2819, GNorm = 1.7126, lr_0 = 6.4270e-04
Loss = 4.8472e-01, PNorm = 52.3024, GNorm = 1.5930, lr_0 = 6.4226e-04
Loss = 4.4649e-01, PNorm = 52.3111, GNorm = 1.2388, lr_0 = 6.4182e-04
Loss = 3.9190e-01, PNorm = 52.3226, GNorm = 1.0598, lr_0 = 6.4138e-04
Loss = 4.0912e-01, PNorm = 52.3388, GNorm = 1.5815, lr_0 = 6.4094e-04
Loss = 4.0175e-01, PNorm = 52.3577, GNorm = 1.3238, lr_0 = 6.4050e-04
Loss = 4.8602e-01, PNorm = 52.3720, GNorm = 2.6301, lr_0 = 6.4006e-04
Loss = 3.9515e-01, PNorm = 52.3906, GNorm = 1.6928, lr_0 = 6.3962e-04
Loss = 4.6730e-01, PNorm = 52.4032, GNorm = 1.4364, lr_0 = 6.3918e-04
Loss = 4.2894e-01, PNorm = 52.4128, GNorm = 1.2964, lr_0 = 6.3874e-04
Loss = 4.3866e-01, PNorm = 52.4212, GNorm = 1.5500, lr_0 = 6.3831e-04
Loss = 4.3329e-01, PNorm = 52.4370, GNorm = 0.9966, lr_0 = 6.3787e-04
Loss = 4.6498e-01, PNorm = 52.4544, GNorm = 1.2820, lr_0 = 6.3743e-04
Loss = 4.3080e-01, PNorm = 52.4686, GNorm = 1.2617, lr_0 = 6.3700e-04
Loss = 4.2773e-01, PNorm = 52.4734, GNorm = 1.5167, lr_0 = 6.3656e-04
Loss = 4.3239e-01, PNorm = 52.4818, GNorm = 1.6375, lr_0 = 6.3612e-04
Loss = 4.9240e-01, PNorm = 52.4909, GNorm = 1.4247, lr_0 = 6.3569e-04
Loss = 4.3714e-01, PNorm = 52.5063, GNorm = 1.8128, lr_0 = 6.3525e-04
Loss = 4.3340e-01, PNorm = 52.5173, GNorm = 1.4394, lr_0 = 6.3482e-04
Loss = 4.6719e-01, PNorm = 52.5354, GNorm = 1.3868, lr_0 = 6.3438e-04
Loss = 4.1713e-01, PNorm = 52.5463, GNorm = 1.1573, lr_0 = 6.3395e-04
Loss = 4.1665e-01, PNorm = 52.5517, GNorm = 1.2380, lr_0 = 6.3351e-04
Loss = 4.6180e-01, PNorm = 52.5634, GNorm = 1.6173, lr_0 = 6.3308e-04
Loss = 4.9587e-01, PNorm = 52.5733, GNorm = 1.1523, lr_0 = 6.3265e-04
Loss = 4.1716e-01, PNorm = 52.5865, GNorm = 2.4730, lr_0 = 6.3221e-04
Loss = 4.0115e-01, PNorm = 52.5941, GNorm = 1.7523, lr_0 = 6.3178e-04
Loss = 4.1242e-01, PNorm = 52.6060, GNorm = 1.2808, lr_0 = 6.3135e-04
Loss = 4.4869e-01, PNorm = 52.6198, GNorm = 1.2646, lr_0 = 6.3091e-04
Loss = 4.6950e-01, PNorm = 52.6299, GNorm = 1.5568, lr_0 = 6.3048e-04
Loss = 4.2681e-01, PNorm = 52.6496, GNorm = 2.0247, lr_0 = 6.3005e-04
Loss = 3.8202e-01, PNorm = 52.6628, GNorm = 0.8918, lr_0 = 6.2962e-04
Loss = 4.5652e-01, PNorm = 52.6811, GNorm = 0.9435, lr_0 = 6.2919e-04
Loss = 4.0813e-01, PNorm = 52.6935, GNorm = 1.1477, lr_0 = 6.2876e-04
Loss = 5.1689e-01, PNorm = 52.7076, GNorm = 1.2060, lr_0 = 6.2833e-04
Loss = 4.5078e-01, PNorm = 52.7184, GNorm = 1.2354, lr_0 = 6.2789e-04
Loss = 4.1076e-01, PNorm = 52.7311, GNorm = 2.5077, lr_0 = 6.2746e-04
Loss = 4.5325e-01, PNorm = 52.7447, GNorm = 0.9726, lr_0 = 6.2703e-04
Loss = 4.1909e-01, PNorm = 52.7607, GNorm = 2.1861, lr_0 = 6.2661e-04
Loss = 4.4650e-01, PNorm = 52.7755, GNorm = 1.8200, lr_0 = 6.2618e-04
Loss = 4.4658e-01, PNorm = 52.7893, GNorm = 1.8416, lr_0 = 6.2575e-04
Loss = 4.2841e-01, PNorm = 52.7979, GNorm = 1.4300, lr_0 = 6.2532e-04
Loss = 4.3142e-01, PNorm = 52.8134, GNorm = 1.8526, lr_0 = 6.2489e-04
Loss = 4.5903e-01, PNorm = 52.8240, GNorm = 0.9659, lr_0 = 6.2446e-04
Loss = 4.7886e-01, PNorm = 52.8402, GNorm = 1.1359, lr_0 = 6.2403e-04
Loss = 4.3991e-01, PNorm = 52.8538, GNorm = 1.3010, lr_0 = 6.2361e-04
Loss = 4.6223e-01, PNorm = 52.8605, GNorm = 1.4495, lr_0 = 6.2318e-04
Loss = 4.0709e-01, PNorm = 52.8765, GNorm = 1.0868, lr_0 = 6.2275e-04
Loss = 4.1950e-01, PNorm = 52.8883, GNorm = 1.1393, lr_0 = 6.2233e-04
Loss = 4.2432e-01, PNorm = 52.8970, GNorm = 1.3936, lr_0 = 6.2190e-04
Loss = 4.5317e-01, PNorm = 52.9103, GNorm = 1.7979, lr_0 = 6.2147e-04
Loss = 4.5920e-01, PNorm = 52.9241, GNorm = 1.0988, lr_0 = 6.2105e-04
Loss = 4.8334e-01, PNorm = 52.9407, GNorm = 1.5497, lr_0 = 6.2062e-04
Loss = 4.6420e-01, PNorm = 52.9535, GNorm = 1.8288, lr_0 = 6.2020e-04
Loss = 4.9540e-01, PNorm = 52.9645, GNorm = 2.0207, lr_0 = 6.1977e-04
Loss = 4.2754e-01, PNorm = 52.9694, GNorm = 1.0776, lr_0 = 6.1935e-04
Loss = 4.7041e-01, PNorm = 52.9784, GNorm = 1.4670, lr_0 = 6.1892e-04
Loss = 4.8425e-01, PNorm = 52.9905, GNorm = 1.0412, lr_0 = 6.1850e-04
Loss = 4.3464e-01, PNorm = 52.9922, GNorm = 1.7213, lr_0 = 6.1808e-04
Loss = 4.9068e-01, PNorm = 53.0040, GNorm = 1.8087, lr_0 = 6.1765e-04
Loss = 5.0785e-01, PNorm = 53.0217, GNorm = 2.2777, lr_0 = 6.1723e-04
Loss = 4.4778e-01, PNorm = 53.0452, GNorm = 2.0025, lr_0 = 6.1681e-04
Loss = 4.2248e-01, PNorm = 53.0610, GNorm = 1.8516, lr_0 = 6.1638e-04
Loss = 4.4171e-01, PNorm = 53.0714, GNorm = 1.7755, lr_0 = 6.1596e-04
Loss = 4.2382e-01, PNorm = 53.0855, GNorm = 0.9960, lr_0 = 6.1554e-04
Loss = 5.3191e-01, PNorm = 53.0936, GNorm = 2.0368, lr_0 = 6.1512e-04
Loss = 5.0970e-01, PNorm = 53.1032, GNorm = 1.6919, lr_0 = 6.1470e-04
Loss = 4.8030e-01, PNorm = 53.1184, GNorm = 1.7606, lr_0 = 6.1428e-04
Loss = 4.3194e-01, PNorm = 53.1392, GNorm = 1.4277, lr_0 = 6.1385e-04
Loss = 4.8419e-01, PNorm = 53.1538, GNorm = 1.7057, lr_0 = 6.1343e-04
Loss = 4.7128e-01, PNorm = 53.1621, GNorm = 1.9233, lr_0 = 6.1301e-04
Loss = 4.4426e-01, PNorm = 53.1712, GNorm = 1.5350, lr_0 = 6.1259e-04
Loss = 4.2689e-01, PNorm = 53.1725, GNorm = 1.4419, lr_0 = 6.1217e-04
Loss = 4.1076e-01, PNorm = 53.1821, GNorm = 1.3414, lr_0 = 6.1175e-04
Loss = 4.2492e-01, PNorm = 53.1952, GNorm = 1.6937, lr_0 = 6.1134e-04
Loss = 3.9315e-01, PNorm = 53.2005, GNorm = 1.1649, lr_0 = 6.1092e-04
Loss = 4.2771e-01, PNorm = 53.2090, GNorm = 1.5203, lr_0 = 6.1050e-04
Validation mae = 0.118227
Epoch 8
Loss = 4.2655e-01, PNorm = 53.2209, GNorm = 1.2229, lr_0 = 6.1008e-04
Loss = 4.1865e-01, PNorm = 53.2373, GNorm = 1.0197, lr_0 = 6.0966e-04
Loss = 4.9055e-01, PNorm = 53.2517, GNorm = 1.1266, lr_0 = 6.0924e-04
Loss = 4.7416e-01, PNorm = 53.2631, GNorm = 1.4588, lr_0 = 6.0883e-04
Loss = 4.0880e-01, PNorm = 53.2772, GNorm = 1.6768, lr_0 = 6.0841e-04
Loss = 4.1922e-01, PNorm = 53.2889, GNorm = 1.3189, lr_0 = 6.0799e-04
Loss = 3.5204e-01, PNorm = 53.2993, GNorm = 1.4868, lr_0 = 6.0758e-04
Loss = 4.8233e-01, PNorm = 53.3074, GNorm = 1.7679, lr_0 = 6.0716e-04
Loss = 4.1730e-01, PNorm = 53.3249, GNorm = 2.5463, lr_0 = 6.0674e-04
Loss = 4.1146e-01, PNorm = 53.3415, GNorm = 1.0290, lr_0 = 6.0633e-04
Loss = 4.1397e-01, PNorm = 53.3582, GNorm = 1.3757, lr_0 = 6.0591e-04
Loss = 3.6746e-01, PNorm = 53.3726, GNorm = 0.8445, lr_0 = 6.0550e-04
Loss = 4.0995e-01, PNorm = 53.3857, GNorm = 1.8710, lr_0 = 6.0508e-04
Loss = 3.8839e-01, PNorm = 53.3940, GNorm = 1.6719, lr_0 = 6.0467e-04
Loss = 4.4611e-01, PNorm = 53.4097, GNorm = 1.2263, lr_0 = 6.0425e-04
Loss = 3.9749e-01, PNorm = 53.4218, GNorm = 1.3961, lr_0 = 6.0384e-04
Loss = 4.3784e-01, PNorm = 53.4351, GNorm = 1.2274, lr_0 = 6.0343e-04
Loss = 4.1798e-01, PNorm = 53.4416, GNorm = 1.4074, lr_0 = 6.0301e-04
Loss = 4.3152e-01, PNorm = 53.4507, GNorm = 2.3569, lr_0 = 6.0260e-04
Loss = 4.5974e-01, PNorm = 53.4588, GNorm = 1.3276, lr_0 = 6.0219e-04
Loss = 4.2796e-01, PNorm = 53.4744, GNorm = 1.5252, lr_0 = 6.0178e-04
Loss = 4.4178e-01, PNorm = 53.4863, GNorm = 1.3432, lr_0 = 6.0136e-04
Loss = 4.4333e-01, PNorm = 53.5013, GNorm = 3.1649, lr_0 = 6.0095e-04
Loss = 4.2918e-01, PNorm = 53.5137, GNorm = 1.0886, lr_0 = 6.0054e-04
Loss = 4.4814e-01, PNorm = 53.5266, GNorm = 1.5559, lr_0 = 6.0013e-04
Loss = 4.1070e-01, PNorm = 53.5364, GNorm = 1.2149, lr_0 = 5.9972e-04
Loss = 4.6970e-01, PNorm = 53.5436, GNorm = 1.2341, lr_0 = 5.9931e-04
Loss = 4.7668e-01, PNorm = 53.5517, GNorm = 1.6852, lr_0 = 5.9890e-04
Loss = 4.8642e-01, PNorm = 53.5659, GNorm = 1.9939, lr_0 = 5.9849e-04
Loss = 4.2826e-01, PNorm = 53.5763, GNorm = 1.5451, lr_0 = 5.9808e-04
Loss = 4.3099e-01, PNorm = 53.5955, GNorm = 1.8733, lr_0 = 5.9767e-04
Loss = 3.8308e-01, PNorm = 53.6072, GNorm = 1.4901, lr_0 = 5.9726e-04
Loss = 4.4912e-01, PNorm = 53.6195, GNorm = 1.4994, lr_0 = 5.9685e-04
Loss = 4.0983e-01, PNorm = 53.6330, GNorm = 1.9286, lr_0 = 5.9644e-04
Loss = 5.0595e-01, PNorm = 53.6478, GNorm = 1.0916, lr_0 = 5.9603e-04
Loss = 4.1103e-01, PNorm = 53.6585, GNorm = 1.1867, lr_0 = 5.9562e-04
Loss = 4.0023e-01, PNorm = 53.6645, GNorm = 1.7973, lr_0 = 5.9521e-04
Loss = 4.2187e-01, PNorm = 53.6755, GNorm = 1.0391, lr_0 = 5.9481e-04
Loss = 4.4474e-01, PNorm = 53.6798, GNorm = 1.3373, lr_0 = 5.9440e-04
Loss = 4.2764e-01, PNorm = 53.6883, GNorm = 1.3094, lr_0 = 5.9399e-04
Loss = 3.9980e-01, PNorm = 53.6934, GNorm = 1.3522, lr_0 = 5.9358e-04
Loss = 5.3202e-01, PNorm = 53.7049, GNorm = 1.7107, lr_0 = 5.9318e-04
Loss = 4.2187e-01, PNorm = 53.7211, GNorm = 1.8827, lr_0 = 5.9277e-04
Loss = 4.8526e-01, PNorm = 53.7390, GNorm = 2.5084, lr_0 = 5.9236e-04
Loss = 4.3210e-01, PNorm = 53.7541, GNorm = 1.4949, lr_0 = 5.9196e-04
Loss = 4.0964e-01, PNorm = 53.7789, GNorm = 0.9192, lr_0 = 5.9155e-04
Loss = 4.1801e-01, PNorm = 53.8001, GNorm = 1.0802, lr_0 = 5.9115e-04
Loss = 4.5785e-01, PNorm = 53.8089, GNorm = 1.2428, lr_0 = 5.9074e-04
Loss = 3.9377e-01, PNorm = 53.8224, GNorm = 1.6056, lr_0 = 5.9034e-04
Loss = 4.5575e-01, PNorm = 53.8282, GNorm = 1.4798, lr_0 = 5.8993e-04
Loss = 3.8160e-01, PNorm = 53.8405, GNorm = 0.8943, lr_0 = 5.8953e-04
Loss = 3.9626e-01, PNorm = 53.8510, GNorm = 1.2997, lr_0 = 5.8913e-04
Loss = 4.1102e-01, PNorm = 53.8637, GNorm = 0.7891, lr_0 = 5.8872e-04
Loss = 4.3528e-01, PNorm = 53.8717, GNorm = 1.1255, lr_0 = 5.8832e-04
Loss = 4.3118e-01, PNorm = 53.8820, GNorm = 1.4021, lr_0 = 5.8792e-04
Loss = 3.8522e-01, PNorm = 53.8872, GNorm = 1.2362, lr_0 = 5.8751e-04
Loss = 4.0279e-01, PNorm = 53.8981, GNorm = 1.0322, lr_0 = 5.8711e-04
Loss = 4.6428e-01, PNorm = 53.9093, GNorm = 1.4929, lr_0 = 5.8671e-04
Loss = 3.9049e-01, PNorm = 53.9248, GNorm = 1.1001, lr_0 = 5.8631e-04
Loss = 4.9813e-01, PNorm = 53.9402, GNorm = 1.3050, lr_0 = 5.8591e-04
Loss = 4.0814e-01, PNorm = 53.9504, GNorm = 1.1197, lr_0 = 5.8550e-04
Loss = 3.9142e-01, PNorm = 53.9526, GNorm = 1.2100, lr_0 = 5.8510e-04
Loss = 4.2984e-01, PNorm = 53.9580, GNorm = 1.8419, lr_0 = 5.8470e-04
Loss = 3.5469e-01, PNorm = 53.9678, GNorm = 1.3896, lr_0 = 5.8430e-04
Loss = 4.8100e-01, PNorm = 53.9780, GNorm = 1.4752, lr_0 = 5.8390e-04
Loss = 4.1801e-01, PNorm = 53.9996, GNorm = 2.4902, lr_0 = 5.8350e-04
Loss = 5.4175e-01, PNorm = 54.0181, GNorm = 1.3329, lr_0 = 5.8310e-04
Loss = 4.0777e-01, PNorm = 54.0385, GNorm = 1.2232, lr_0 = 5.8270e-04
Loss = 4.3731e-01, PNorm = 54.0545, GNorm = 1.2835, lr_0 = 5.8230e-04
Loss = 4.0257e-01, PNorm = 54.0642, GNorm = 1.0822, lr_0 = 5.8190e-04
Loss = 4.2146e-01, PNorm = 54.0768, GNorm = 2.4130, lr_0 = 5.8151e-04
Loss = 4.3771e-01, PNorm = 54.0875, GNorm = 1.0703, lr_0 = 5.8111e-04
Loss = 4.0778e-01, PNorm = 54.1006, GNorm = 2.6499, lr_0 = 5.8071e-04
Loss = 4.7866e-01, PNorm = 54.1085, GNorm = 1.7074, lr_0 = 5.8031e-04
Loss = 4.0202e-01, PNorm = 54.1179, GNorm = 1.0807, lr_0 = 5.7991e-04
Loss = 3.8139e-01, PNorm = 54.1212, GNorm = 1.2964, lr_0 = 5.7952e-04
Loss = 4.6370e-01, PNorm = 54.1339, GNorm = 1.3367, lr_0 = 5.7912e-04
Loss = 4.9656e-01, PNorm = 54.1516, GNorm = 2.0116, lr_0 = 5.7872e-04
Loss = 4.8316e-01, PNorm = 54.1557, GNorm = 1.0206, lr_0 = 5.7833e-04
Loss = 4.2475e-01, PNorm = 54.1737, GNorm = 1.9213, lr_0 = 5.7793e-04
Loss = 4.3936e-01, PNorm = 54.1901, GNorm = 1.4540, lr_0 = 5.7753e-04
Loss = 4.0970e-01, PNorm = 54.2017, GNorm = 1.2857, lr_0 = 5.7714e-04
Loss = 4.4255e-01, PNorm = 54.2132, GNorm = 1.4936, lr_0 = 5.7674e-04
Loss = 5.0258e-01, PNorm = 54.2285, GNorm = 1.2395, lr_0 = 5.7635e-04
Loss = 3.5951e-01, PNorm = 54.2422, GNorm = 1.1425, lr_0 = 5.7595e-04
Loss = 4.6102e-01, PNorm = 54.2541, GNorm = 1.5261, lr_0 = 5.7556e-04
Loss = 4.2615e-01, PNorm = 54.2611, GNorm = 1.2157, lr_0 = 5.7516e-04
Loss = 3.8334e-01, PNorm = 54.2656, GNorm = 1.5734, lr_0 = 5.7477e-04
Loss = 4.4332e-01, PNorm = 54.2693, GNorm = 1.5107, lr_0 = 5.7438e-04
Loss = 5.0750e-01, PNorm = 54.2758, GNorm = 2.1844, lr_0 = 5.7398e-04
Loss = 4.5330e-01, PNorm = 54.2910, GNorm = 1.3528, lr_0 = 5.7359e-04
Loss = 4.5089e-01, PNorm = 54.3030, GNorm = 1.5823, lr_0 = 5.7320e-04
Loss = 4.6809e-01, PNorm = 54.3149, GNorm = 1.3723, lr_0 = 5.7280e-04
Loss = 4.6826e-01, PNorm = 54.3267, GNorm = 1.2721, lr_0 = 5.7241e-04
Loss = 4.3309e-01, PNorm = 54.3296, GNorm = 1.5062, lr_0 = 5.7202e-04
Loss = 4.4794e-01, PNorm = 54.3438, GNorm = 1.3799, lr_0 = 5.7163e-04
Loss = 4.5442e-01, PNorm = 54.3541, GNorm = 1.3410, lr_0 = 5.7124e-04
Loss = 4.0617e-01, PNorm = 54.3671, GNorm = 1.7908, lr_0 = 5.7084e-04
Loss = 4.2989e-01, PNorm = 54.3750, GNorm = 1.0129, lr_0 = 5.7045e-04
Loss = 4.1225e-01, PNorm = 54.3888, GNorm = 1.0386, lr_0 = 5.7006e-04
Loss = 4.4364e-01, PNorm = 54.3973, GNorm = 1.2860, lr_0 = 5.6967e-04
Loss = 3.9932e-01, PNorm = 54.4054, GNorm = 1.6625, lr_0 = 5.6928e-04
Loss = 4.4296e-01, PNorm = 54.4087, GNorm = 2.3974, lr_0 = 5.6889e-04
Loss = 4.0994e-01, PNorm = 54.4195, GNorm = 1.1463, lr_0 = 5.6850e-04
Loss = 4.8267e-01, PNorm = 54.4290, GNorm = 1.7033, lr_0 = 5.6811e-04
Loss = 3.8616e-01, PNorm = 54.4409, GNorm = 0.9374, lr_0 = 5.6772e-04
Loss = 4.0800e-01, PNorm = 54.4571, GNorm = 1.4674, lr_0 = 5.6733e-04
Loss = 4.0870e-01, PNorm = 54.4723, GNorm = 1.9408, lr_0 = 5.6695e-04
Loss = 4.3022e-01, PNorm = 54.4830, GNorm = 1.2138, lr_0 = 5.6656e-04
Loss = 4.3084e-01, PNorm = 54.4959, GNorm = 1.8425, lr_0 = 5.6617e-04
Loss = 4.6334e-01, PNorm = 54.5047, GNorm = 1.3610, lr_0 = 5.6578e-04
Loss = 4.0206e-01, PNorm = 54.5106, GNorm = 1.3120, lr_0 = 5.6539e-04
Loss = 4.4833e-01, PNorm = 54.5182, GNorm = 1.3847, lr_0 = 5.6501e-04
Loss = 3.5705e-01, PNorm = 54.5271, GNorm = 1.1317, lr_0 = 5.6462e-04
Loss = 3.7940e-01, PNorm = 54.5308, GNorm = 1.1722, lr_0 = 5.6423e-04
Loss = 4.0152e-01, PNorm = 54.5352, GNorm = 2.3305, lr_0 = 5.6385e-04
Loss = 4.0530e-01, PNorm = 54.5468, GNorm = 1.0783, lr_0 = 5.6346e-04
Loss = 4.5686e-01, PNorm = 54.5567, GNorm = 1.2578, lr_0 = 5.6307e-04
Loss = 4.5975e-01, PNorm = 54.5705, GNorm = 2.1032, lr_0 = 5.6269e-04
Loss = 4.9010e-01, PNorm = 54.5819, GNorm = 1.5063, lr_0 = 5.6230e-04
Validation mae = 0.118033
Epoch 9
Loss = 4.1435e-01, PNorm = 54.5959, GNorm = 1.0332, lr_0 = 5.6192e-04
Loss = 4.1980e-01, PNorm = 54.6094, GNorm = 0.9643, lr_0 = 5.6153e-04
Loss = 4.1219e-01, PNorm = 54.6257, GNorm = 1.4819, lr_0 = 5.6115e-04
Loss = 4.3602e-01, PNorm = 54.6401, GNorm = 0.9902, lr_0 = 5.6076e-04
Loss = 4.8897e-01, PNorm = 54.6466, GNorm = 1.6000, lr_0 = 5.6038e-04
Loss = 4.2386e-01, PNorm = 54.6617, GNorm = 1.7291, lr_0 = 5.6000e-04
Loss = 4.1974e-01, PNorm = 54.6798, GNorm = 1.0850, lr_0 = 5.5961e-04
Loss = 4.1352e-01, PNorm = 54.6951, GNorm = 1.1018, lr_0 = 5.5923e-04
Loss = 4.3400e-01, PNorm = 54.7116, GNorm = 1.7253, lr_0 = 5.5885e-04
Loss = 3.8499e-01, PNorm = 54.7199, GNorm = 0.9902, lr_0 = 5.5846e-04
Loss = 3.9073e-01, PNorm = 54.7335, GNorm = 2.0190, lr_0 = 5.5808e-04
Loss = 4.2934e-01, PNorm = 54.7435, GNorm = 1.2731, lr_0 = 5.5770e-04
Loss = 4.7562e-01, PNorm = 54.7595, GNorm = 1.1900, lr_0 = 5.5732e-04
Loss = 4.8606e-01, PNorm = 54.7674, GNorm = 1.3271, lr_0 = 5.5693e-04
Loss = 4.0388e-01, PNorm = 54.7731, GNorm = 1.2115, lr_0 = 5.5655e-04
Loss = 4.2241e-01, PNorm = 54.7851, GNorm = 1.4475, lr_0 = 5.5617e-04
Loss = 4.0014e-01, PNorm = 54.7929, GNorm = 1.3812, lr_0 = 5.5579e-04
Loss = 3.9738e-01, PNorm = 54.8067, GNorm = 1.1774, lr_0 = 5.5541e-04
Loss = 3.9644e-01, PNorm = 54.8178, GNorm = 1.2650, lr_0 = 5.5503e-04
Loss = 4.0954e-01, PNorm = 54.8230, GNorm = 1.2823, lr_0 = 5.5465e-04
Loss = 4.6498e-01, PNorm = 54.8363, GNorm = 1.8232, lr_0 = 5.5427e-04
Loss = 4.4799e-01, PNorm = 54.8525, GNorm = 0.9405, lr_0 = 5.5389e-04
Loss = 4.5601e-01, PNorm = 54.8651, GNorm = 1.5532, lr_0 = 5.5351e-04
Loss = 4.1885e-01, PNorm = 54.8733, GNorm = 2.0138, lr_0 = 5.5313e-04
Loss = 4.5337e-01, PNorm = 54.8815, GNorm = 2.0560, lr_0 = 5.5275e-04
Loss = 4.2965e-01, PNorm = 54.8936, GNorm = 1.2625, lr_0 = 5.5237e-04
Loss = 3.8611e-01, PNorm = 54.9025, GNorm = 1.2316, lr_0 = 5.5199e-04
Loss = 3.7798e-01, PNorm = 54.9095, GNorm = 1.6954, lr_0 = 5.5162e-04
Loss = 3.9684e-01, PNorm = 54.9150, GNorm = 1.0206, lr_0 = 5.5124e-04
Loss = 3.9760e-01, PNorm = 54.9245, GNorm = 1.5099, lr_0 = 5.5086e-04
Loss = 4.0566e-01, PNorm = 54.9338, GNorm = 0.9814, lr_0 = 5.5048e-04
Loss = 3.7943e-01, PNorm = 54.9389, GNorm = 1.9539, lr_0 = 5.5011e-04
Loss = 3.8305e-01, PNorm = 54.9548, GNorm = 1.0292, lr_0 = 5.4973e-04
Loss = 4.4849e-01, PNorm = 54.9721, GNorm = 1.4619, lr_0 = 5.4935e-04
Loss = 4.5831e-01, PNorm = 54.9838, GNorm = 1.3896, lr_0 = 5.4898e-04
Loss = 4.1467e-01, PNorm = 54.9890, GNorm = 1.1754, lr_0 = 5.4860e-04
Loss = 4.3736e-01, PNorm = 54.9973, GNorm = 1.0321, lr_0 = 5.4822e-04
Loss = 3.7619e-01, PNorm = 55.0097, GNorm = 1.3910, lr_0 = 5.4785e-04
Loss = 3.8820e-01, PNorm = 55.0163, GNorm = 0.8043, lr_0 = 5.4747e-04
Loss = 4.0016e-01, PNorm = 55.0270, GNorm = 1.5747, lr_0 = 5.4710e-04
Loss = 4.5307e-01, PNorm = 55.0364, GNorm = 1.4394, lr_0 = 5.4672e-04
Loss = 4.3669e-01, PNorm = 55.0465, GNorm = 1.6451, lr_0 = 5.4635e-04
Loss = 4.1580e-01, PNorm = 55.0567, GNorm = 1.3864, lr_0 = 5.4597e-04
Loss = 4.4516e-01, PNorm = 55.0749, GNorm = 1.2252, lr_0 = 5.4560e-04
Loss = 4.1258e-01, PNorm = 55.0844, GNorm = 1.1811, lr_0 = 5.4523e-04
Loss = 4.3703e-01, PNorm = 55.0918, GNorm = 2.7809, lr_0 = 5.4485e-04
Loss = 4.3862e-01, PNorm = 55.0973, GNorm = 2.1202, lr_0 = 5.4448e-04
Loss = 4.3701e-01, PNorm = 55.1078, GNorm = 2.2829, lr_0 = 5.4411e-04
Loss = 4.5096e-01, PNorm = 55.1142, GNorm = 2.2162, lr_0 = 5.4373e-04
Loss = 4.4210e-01, PNorm = 55.1219, GNorm = 1.1183, lr_0 = 5.4336e-04
Loss = 4.8793e-01, PNorm = 55.1358, GNorm = 1.8872, lr_0 = 5.4299e-04
Loss = 4.1046e-01, PNorm = 55.1505, GNorm = 2.6635, lr_0 = 5.4262e-04
Loss = 3.8159e-01, PNorm = 55.1628, GNorm = 1.0064, lr_0 = 5.4225e-04
Loss = 4.1849e-01, PNorm = 55.1698, GNorm = 1.6120, lr_0 = 5.4187e-04
Loss = 4.4739e-01, PNorm = 55.1806, GNorm = 0.8727, lr_0 = 5.4150e-04
Loss = 4.8385e-01, PNorm = 55.1957, GNorm = 1.0426, lr_0 = 5.4113e-04
Loss = 4.5840e-01, PNorm = 55.2138, GNorm = 1.3770, lr_0 = 5.4076e-04
Loss = 3.9642e-01, PNorm = 55.2262, GNorm = 2.1300, lr_0 = 5.4039e-04
Loss = 4.5393e-01, PNorm = 55.2283, GNorm = 1.8864, lr_0 = 5.4002e-04
Loss = 4.2491e-01, PNorm = 55.2406, GNorm = 0.9046, lr_0 = 5.3965e-04
Loss = 3.7159e-01, PNorm = 55.2460, GNorm = 1.0476, lr_0 = 5.3928e-04
Loss = 4.2418e-01, PNorm = 55.2546, GNorm = 1.1703, lr_0 = 5.3891e-04
Loss = 4.1068e-01, PNorm = 55.2565, GNorm = 1.5043, lr_0 = 5.3854e-04
Loss = 4.3946e-01, PNorm = 55.2684, GNorm = 1.8701, lr_0 = 5.3817e-04
Loss = 4.7149e-01, PNorm = 55.2811, GNorm = 2.2812, lr_0 = 5.3781e-04
Loss = 4.9405e-01, PNorm = 55.2976, GNorm = 1.5010, lr_0 = 5.3744e-04
Loss = 4.3103e-01, PNorm = 55.3092, GNorm = 1.7285, lr_0 = 5.3707e-04
Loss = 4.6088e-01, PNorm = 55.3274, GNorm = 1.1999, lr_0 = 5.3670e-04
Loss = 4.2508e-01, PNorm = 55.3442, GNorm = 1.3272, lr_0 = 5.3633e-04
Loss = 4.5232e-01, PNorm = 55.3612, GNorm = 1.4937, lr_0 = 5.3597e-04
Loss = 4.6409e-01, PNorm = 55.3733, GNorm = 1.3775, lr_0 = 5.3560e-04
Loss = 4.4501e-01, PNorm = 55.3812, GNorm = 1.6509, lr_0 = 5.3523e-04
Loss = 3.6096e-01, PNorm = 55.3843, GNorm = 1.1081, lr_0 = 5.3486e-04
Loss = 4.5335e-01, PNorm = 55.3925, GNorm = 1.2223, lr_0 = 5.3450e-04
Loss = 4.1981e-01, PNorm = 55.4031, GNorm = 0.9791, lr_0 = 5.3413e-04
Loss = 3.5704e-01, PNorm = 55.4174, GNorm = 1.5762, lr_0 = 5.3377e-04
Loss = 3.9769e-01, PNorm = 55.4234, GNorm = 1.2176, lr_0 = 5.3340e-04
Loss = 4.1954e-01, PNorm = 55.4287, GNorm = 1.7213, lr_0 = 5.3304e-04
Loss = 4.2294e-01, PNorm = 55.4411, GNorm = 1.1768, lr_0 = 5.3267e-04
Loss = 3.6566e-01, PNorm = 55.4560, GNorm = 1.2655, lr_0 = 5.3231e-04
Loss = 4.3339e-01, PNorm = 55.4666, GNorm = 1.4985, lr_0 = 5.3194e-04
Loss = 4.1068e-01, PNorm = 55.4782, GNorm = 1.4521, lr_0 = 5.3158e-04
Loss = 4.4716e-01, PNorm = 55.4854, GNorm = 1.9114, lr_0 = 5.3121e-04
Loss = 4.5358e-01, PNorm = 55.4951, GNorm = 1.9409, lr_0 = 5.3085e-04
Loss = 4.3850e-01, PNorm = 55.5000, GNorm = 1.0827, lr_0 = 5.3048e-04
Loss = 3.9229e-01, PNorm = 55.5077, GNorm = 0.9076, lr_0 = 5.3012e-04
Loss = 4.2578e-01, PNorm = 55.5161, GNorm = 1.5378, lr_0 = 5.2976e-04
Loss = 4.3145e-01, PNorm = 55.5248, GNorm = 1.6829, lr_0 = 5.2939e-04
Loss = 3.7987e-01, PNorm = 55.5353, GNorm = 2.4023, lr_0 = 5.2903e-04
Loss = 4.0778e-01, PNorm = 55.5470, GNorm = 1.4567, lr_0 = 5.2867e-04
Loss = 4.3324e-01, PNorm = 55.5559, GNorm = 1.5570, lr_0 = 5.2831e-04
Loss = 4.0823e-01, PNorm = 55.5678, GNorm = 1.2509, lr_0 = 5.2795e-04
Loss = 4.4226e-01, PNorm = 55.5742, GNorm = 1.2929, lr_0 = 5.2758e-04
Loss = 3.9291e-01, PNorm = 55.5872, GNorm = 1.3324, lr_0 = 5.2722e-04
Loss = 3.9652e-01, PNorm = 55.5988, GNorm = 1.1605, lr_0 = 5.2686e-04
Loss = 4.3201e-01, PNorm = 55.5984, GNorm = 1.4048, lr_0 = 5.2650e-04
Loss = 4.2378e-01, PNorm = 55.6035, GNorm = 1.6749, lr_0 = 5.2614e-04
Loss = 4.1147e-01, PNorm = 55.6092, GNorm = 1.3091, lr_0 = 5.2578e-04
Loss = 4.4470e-01, PNorm = 55.6198, GNorm = 1.5309, lr_0 = 5.2542e-04
Loss = 4.1720e-01, PNorm = 55.6309, GNorm = 1.3181, lr_0 = 5.2506e-04
Loss = 4.1777e-01, PNorm = 55.6362, GNorm = 1.9392, lr_0 = 5.2470e-04
Loss = 4.3885e-01, PNorm = 55.6437, GNorm = 1.5615, lr_0 = 5.2434e-04
Loss = 4.0263e-01, PNorm = 55.6508, GNorm = 1.1085, lr_0 = 5.2398e-04
Loss = 4.4718e-01, PNorm = 55.6612, GNorm = 1.1122, lr_0 = 5.2362e-04
Loss = 3.4782e-01, PNorm = 55.6700, GNorm = 0.9847, lr_0 = 5.2326e-04
Loss = 4.2138e-01, PNorm = 55.6783, GNorm = 1.9042, lr_0 = 5.2290e-04
Loss = 4.7491e-01, PNorm = 55.6924, GNorm = 1.8642, lr_0 = 5.2255e-04
Loss = 4.6805e-01, PNorm = 55.7041, GNorm = 2.4199, lr_0 = 5.2219e-04
Loss = 4.3112e-01, PNorm = 55.7153, GNorm = 1.3247, lr_0 = 5.2183e-04
Loss = 4.0595e-01, PNorm = 55.7224, GNorm = 1.5646, lr_0 = 5.2147e-04
Loss = 4.0340e-01, PNorm = 55.7349, GNorm = 1.3849, lr_0 = 5.2112e-04
Loss = 4.2657e-01, PNorm = 55.7388, GNorm = 1.1158, lr_0 = 5.2076e-04
Loss = 4.7459e-01, PNorm = 55.7554, GNorm = 1.4655, lr_0 = 5.2040e-04
Loss = 4.0058e-01, PNorm = 55.7635, GNorm = 1.3790, lr_0 = 5.2005e-04
Loss = 4.2510e-01, PNorm = 55.7760, GNorm = 0.9866, lr_0 = 5.1969e-04
Loss = 4.1090e-01, PNorm = 55.7887, GNorm = 1.1501, lr_0 = 5.1933e-04
Loss = 4.1873e-01, PNorm = 55.8015, GNorm = 1.3469, lr_0 = 5.1898e-04
Loss = 4.7721e-01, PNorm = 55.8077, GNorm = 2.3283, lr_0 = 5.1862e-04
Loss = 4.4918e-01, PNorm = 55.8081, GNorm = 1.3275, lr_0 = 5.1827e-04
Loss = 4.2598e-01, PNorm = 55.8195, GNorm = 1.7819, lr_0 = 5.1791e-04
Validation mae = 0.116041
Epoch 10
Loss = 3.9517e-01, PNorm = 55.8356, GNorm = 1.3334, lr_0 = 5.1756e-04
Loss = 4.2612e-01, PNorm = 55.8471, GNorm = 1.0633, lr_0 = 5.1720e-04
Loss = 4.5328e-01, PNorm = 55.8585, GNorm = 1.0875, lr_0 = 5.1685e-04
Loss = 4.1380e-01, PNorm = 55.8705, GNorm = 1.2435, lr_0 = 5.1649e-04
Loss = 4.2671e-01, PNorm = 55.8844, GNorm = 1.2263, lr_0 = 5.1614e-04
Loss = 4.1814e-01, PNorm = 55.8945, GNorm = 1.0818, lr_0 = 5.1579e-04
Loss = 3.9077e-01, PNorm = 55.9011, GNorm = 1.4534, lr_0 = 5.1543e-04
Loss = 3.9700e-01, PNorm = 55.9076, GNorm = 1.1468, lr_0 = 5.1508e-04
Loss = 4.3714e-01, PNorm = 55.9177, GNorm = 2.1600, lr_0 = 5.1473e-04
Loss = 3.7086e-01, PNorm = 55.9299, GNorm = 0.9229, lr_0 = 5.1437e-04
Loss = 4.0934e-01, PNorm = 55.9407, GNorm = 1.3912, lr_0 = 5.1402e-04
Loss = 4.4345e-01, PNorm = 55.9542, GNorm = 2.1226, lr_0 = 5.1367e-04
Loss = 4.0054e-01, PNorm = 55.9574, GNorm = 1.0144, lr_0 = 5.1332e-04
Loss = 4.4527e-01, PNorm = 55.9651, GNorm = 1.4019, lr_0 = 5.1297e-04
Loss = 4.1065e-01, PNorm = 55.9733, GNorm = 1.5974, lr_0 = 5.1262e-04
Loss = 4.1122e-01, PNorm = 55.9812, GNorm = 1.2338, lr_0 = 5.1226e-04
Loss = 4.0724e-01, PNorm = 55.9992, GNorm = 1.0548, lr_0 = 5.1191e-04
Loss = 4.7934e-01, PNorm = 56.0110, GNorm = 1.1915, lr_0 = 5.1156e-04
Loss = 4.5937e-01, PNorm = 56.0217, GNorm = 1.5718, lr_0 = 5.1121e-04
Loss = 4.0237e-01, PNorm = 56.0333, GNorm = 1.0687, lr_0 = 5.1086e-04
Loss = 4.2873e-01, PNorm = 56.0481, GNorm = 1.8892, lr_0 = 5.1051e-04
Loss = 3.7938e-01, PNorm = 56.0577, GNorm = 1.0650, lr_0 = 5.1016e-04
Loss = 4.0724e-01, PNorm = 56.0707, GNorm = 1.4548, lr_0 = 5.0981e-04
Loss = 3.6311e-01, PNorm = 56.0799, GNorm = 1.7411, lr_0 = 5.0946e-04
Loss = 4.3059e-01, PNorm = 56.0899, GNorm = 2.2744, lr_0 = 5.0911e-04
Loss = 3.7412e-01, PNorm = 56.0937, GNorm = 1.8999, lr_0 = 5.0877e-04
Loss = 4.5591e-01, PNorm = 56.1066, GNorm = 2.3604, lr_0 = 5.0842e-04
Loss = 4.5111e-01, PNorm = 56.1182, GNorm = 1.1474, lr_0 = 5.0807e-04
Loss = 4.3930e-01, PNorm = 56.1331, GNorm = 1.1753, lr_0 = 5.0772e-04
Loss = 3.9290e-01, PNorm = 56.1358, GNorm = 2.1319, lr_0 = 5.0737e-04
Loss = 4.2922e-01, PNorm = 56.1449, GNorm = 1.2143, lr_0 = 5.0703e-04
Loss = 4.1720e-01, PNorm = 56.1541, GNorm = 1.8956, lr_0 = 5.0668e-04
Loss = 4.0962e-01, PNorm = 56.1663, GNorm = 1.4526, lr_0 = 5.0633e-04
Loss = 4.1965e-01, PNorm = 56.1677, GNorm = 1.0576, lr_0 = 5.0598e-04
Loss = 3.8401e-01, PNorm = 56.1734, GNorm = 1.0759, lr_0 = 5.0564e-04
Loss = 4.4218e-01, PNorm = 56.1763, GNorm = 1.2516, lr_0 = 5.0529e-04
Loss = 4.1505e-01, PNorm = 56.1897, GNorm = 1.4199, lr_0 = 5.0494e-04
Loss = 4.4484e-01, PNorm = 56.2004, GNorm = 1.2116, lr_0 = 5.0460e-04
Loss = 4.2541e-01, PNorm = 56.2098, GNorm = 1.2045, lr_0 = 5.0425e-04
Loss = 4.3744e-01, PNorm = 56.2135, GNorm = 1.5930, lr_0 = 5.0391e-04
Loss = 4.5528e-01, PNorm = 56.2227, GNorm = 1.5076, lr_0 = 5.0356e-04
Loss = 4.7945e-01, PNorm = 56.2271, GNorm = 1.5218, lr_0 = 5.0322e-04
Loss = 4.1769e-01, PNorm = 56.2425, GNorm = 1.5027, lr_0 = 5.0287e-04
Loss = 4.1868e-01, PNorm = 56.2585, GNorm = 1.5923, lr_0 = 5.0253e-04
Loss = 3.9379e-01, PNorm = 56.2652, GNorm = 1.8931, lr_0 = 5.0218e-04
Loss = 4.3733e-01, PNorm = 56.2711, GNorm = 1.3238, lr_0 = 5.0184e-04
Loss = 4.0070e-01, PNorm = 56.2825, GNorm = 0.9387, lr_0 = 5.0150e-04
Loss = 3.8639e-01, PNorm = 56.2937, GNorm = 1.7719, lr_0 = 5.0115e-04
Loss = 4.9694e-01, PNorm = 56.2972, GNorm = 1.9145, lr_0 = 5.0081e-04
Loss = 4.1900e-01, PNorm = 56.3105, GNorm = 1.5573, lr_0 = 5.0047e-04
Loss = 3.4435e-01, PNorm = 56.3217, GNorm = 1.4689, lr_0 = 5.0012e-04
Loss = 4.6295e-01, PNorm = 56.3263, GNorm = 1.9963, lr_0 = 4.9978e-04
Loss = 4.2234e-01, PNorm = 56.3336, GNorm = 1.1816, lr_0 = 4.9944e-04
Loss = 4.9402e-01, PNorm = 56.3433, GNorm = 1.2335, lr_0 = 4.9910e-04
Loss = 4.2684e-01, PNorm = 56.3473, GNorm = 0.9243, lr_0 = 4.9875e-04
Loss = 4.0794e-01, PNorm = 56.3588, GNorm = 1.2953, lr_0 = 4.9841e-04
Loss = 4.1727e-01, PNorm = 56.3713, GNorm = 1.6286, lr_0 = 4.9807e-04
Loss = 4.2480e-01, PNorm = 56.3810, GNorm = 1.4581, lr_0 = 4.9773e-04
Loss = 3.8879e-01, PNorm = 56.3921, GNorm = 1.2477, lr_0 = 4.9739e-04
Loss = 3.2331e-01, PNorm = 56.4024, GNorm = 0.9239, lr_0 = 4.9705e-04
Loss = 4.3586e-01, PNorm = 56.4136, GNorm = 1.4208, lr_0 = 4.9671e-04
Loss = 4.6499e-01, PNorm = 56.4220, GNorm = 1.1997, lr_0 = 4.9637e-04
Loss = 4.7796e-01, PNorm = 56.4339, GNorm = 2.1078, lr_0 = 4.9603e-04
Loss = 3.7047e-01, PNorm = 56.4422, GNorm = 1.4880, lr_0 = 4.9569e-04
Loss = 3.8754e-01, PNorm = 56.4520, GNorm = 1.6671, lr_0 = 4.9535e-04
Loss = 3.9622e-01, PNorm = 56.4640, GNorm = 1.4160, lr_0 = 4.9501e-04
Loss = 3.6850e-01, PNorm = 56.4680, GNorm = 1.1283, lr_0 = 4.9467e-04
Loss = 4.6807e-01, PNorm = 56.4793, GNorm = 1.6036, lr_0 = 4.9433e-04
Loss = 4.0538e-01, PNorm = 56.4904, GNorm = 1.3566, lr_0 = 4.9399e-04
Loss = 4.1243e-01, PNorm = 56.4910, GNorm = 1.2911, lr_0 = 4.9365e-04
Loss = 4.3858e-01, PNorm = 56.5003, GNorm = 1.4157, lr_0 = 4.9332e-04
Loss = 4.2361e-01, PNorm = 56.5077, GNorm = 1.4673, lr_0 = 4.9298e-04
Loss = 4.1874e-01, PNorm = 56.5156, GNorm = 1.2750, lr_0 = 4.9264e-04
Loss = 4.7250e-01, PNorm = 56.5238, GNorm = 1.9171, lr_0 = 4.9230e-04
Loss = 4.0588e-01, PNorm = 56.5311, GNorm = 1.9480, lr_0 = 4.9197e-04
Loss = 3.7615e-01, PNorm = 56.5457, GNorm = 1.7025, lr_0 = 4.9163e-04
Loss = 4.2755e-01, PNorm = 56.5535, GNorm = 1.2902, lr_0 = 4.9129e-04
Loss = 4.0134e-01, PNorm = 56.5633, GNorm = 1.1456, lr_0 = 4.9095e-04
Loss = 4.2235e-01, PNorm = 56.5693, GNorm = 1.3620, lr_0 = 4.9062e-04
Loss = 3.3123e-01, PNorm = 56.5781, GNorm = 0.9954, lr_0 = 4.9028e-04
Loss = 3.9369e-01, PNorm = 56.5860, GNorm = 1.1869, lr_0 = 4.8995e-04
Loss = 3.9598e-01, PNorm = 56.5930, GNorm = 1.5038, lr_0 = 4.8961e-04
Loss = 4.2579e-01, PNorm = 56.5954, GNorm = 1.7124, lr_0 = 4.8928e-04
Loss = 4.5423e-01, PNorm = 56.5972, GNorm = 1.7278, lr_0 = 4.8894e-04
Loss = 3.9826e-01, PNorm = 56.6060, GNorm = 1.3195, lr_0 = 4.8861e-04
Loss = 3.8607e-01, PNorm = 56.6151, GNorm = 1.7264, lr_0 = 4.8827e-04
Loss = 4.8597e-01, PNorm = 56.6254, GNorm = 1.9135, lr_0 = 4.8794e-04
Loss = 4.6472e-01, PNorm = 56.6365, GNorm = 1.0214, lr_0 = 4.8760e-04
Loss = 4.2078e-01, PNorm = 56.6452, GNorm = 1.3863, lr_0 = 4.8727e-04
Loss = 3.9455e-01, PNorm = 56.6489, GNorm = 1.3875, lr_0 = 4.8693e-04
Loss = 4.0920e-01, PNorm = 56.6581, GNorm = 1.0838, lr_0 = 4.8660e-04
Loss = 4.4887e-01, PNorm = 56.6705, GNorm = 2.7892, lr_0 = 4.8627e-04
Loss = 4.1445e-01, PNorm = 56.6797, GNorm = 1.6094, lr_0 = 4.8593e-04
Loss = 3.4780e-01, PNorm = 56.6815, GNorm = 0.9262, lr_0 = 4.8560e-04
Loss = 4.3098e-01, PNorm = 56.6927, GNorm = 2.3595, lr_0 = 4.8527e-04
Loss = 3.9606e-01, PNorm = 56.7087, GNorm = 2.0817, lr_0 = 4.8494e-04
Loss = 4.6792e-01, PNorm = 56.7156, GNorm = 2.1099, lr_0 = 4.8460e-04
Loss = 4.5491e-01, PNorm = 56.7219, GNorm = 1.7987, lr_0 = 4.8427e-04
Loss = 3.7733e-01, PNorm = 56.7296, GNorm = 1.3960, lr_0 = 4.8394e-04
Loss = 4.5469e-01, PNorm = 56.7377, GNorm = 1.9528, lr_0 = 4.8361e-04
Loss = 3.6902e-01, PNorm = 56.7426, GNorm = 1.8818, lr_0 = 4.8328e-04
Loss = 4.5121e-01, PNorm = 56.7522, GNorm = 1.5592, lr_0 = 4.8295e-04
Loss = 3.6946e-01, PNorm = 56.7646, GNorm = 1.6081, lr_0 = 4.8262e-04
Loss = 4.1759e-01, PNorm = 56.7695, GNorm = 1.4806, lr_0 = 4.8228e-04
Loss = 4.3383e-01, PNorm = 56.7778, GNorm = 1.3218, lr_0 = 4.8195e-04
Loss = 3.7850e-01, PNorm = 56.7833, GNorm = 1.3833, lr_0 = 4.8162e-04
Loss = 3.5380e-01, PNorm = 56.7889, GNorm = 1.3990, lr_0 = 4.8129e-04
Loss = 3.9023e-01, PNorm = 56.8000, GNorm = 0.9106, lr_0 = 4.8096e-04
Loss = 4.6955e-01, PNorm = 56.8090, GNorm = 1.9465, lr_0 = 4.8064e-04
Loss = 4.4872e-01, PNorm = 56.8217, GNorm = 1.2279, lr_0 = 4.8031e-04
Loss = 3.9510e-01, PNorm = 56.8274, GNorm = 1.3905, lr_0 = 4.7998e-04
Loss = 3.8057e-01, PNorm = 56.8374, GNorm = 1.9938, lr_0 = 4.7965e-04
Loss = 4.1579e-01, PNorm = 56.8428, GNorm = 1.4051, lr_0 = 4.7932e-04
Loss = 3.4929e-01, PNorm = 56.8505, GNorm = 1.4684, lr_0 = 4.7899e-04
Loss = 4.0054e-01, PNorm = 56.8549, GNorm = 0.9832, lr_0 = 4.7866e-04
Loss = 4.2381e-01, PNorm = 56.8584, GNorm = 1.1897, lr_0 = 4.7833e-04
Loss = 3.9847e-01, PNorm = 56.8687, GNorm = 1.0462, lr_0 = 4.7801e-04
Loss = 4.2795e-01, PNorm = 56.8766, GNorm = 2.7092, lr_0 = 4.7768e-04
Loss = 3.9339e-01, PNorm = 56.8812, GNorm = 1.3353, lr_0 = 4.7735e-04
Loss = 4.0987e-01, PNorm = 56.8868, GNorm = 1.3361, lr_0 = 4.7703e-04
Validation mae = 0.115945
Epoch 11
Loss = 4.4548e-01, PNorm = 56.8956, GNorm = 1.3700, lr_0 = 4.7670e-04
Loss = 4.4066e-01, PNorm = 56.9022, GNorm = 1.4381, lr_0 = 4.7637e-04
Loss = 4.2690e-01, PNorm = 56.9134, GNorm = 1.1230, lr_0 = 4.7605e-04
Loss = 4.2268e-01, PNorm = 56.9257, GNorm = 1.4417, lr_0 = 4.7572e-04
Loss = 4.3978e-01, PNorm = 56.9427, GNorm = 2.1957, lr_0 = 4.7539e-04
Loss = 4.2745e-01, PNorm = 56.9531, GNorm = 1.9761, lr_0 = 4.7507e-04
Loss = 3.9795e-01, PNorm = 56.9664, GNorm = 1.9025, lr_0 = 4.7474e-04
Loss = 4.4264e-01, PNorm = 56.9678, GNorm = 1.1376, lr_0 = 4.7442e-04
Loss = 3.7178e-01, PNorm = 56.9758, GNorm = 1.4238, lr_0 = 4.7409e-04
Loss = 4.4273e-01, PNorm = 56.9829, GNorm = 1.3073, lr_0 = 4.7377e-04
Loss = 3.7285e-01, PNorm = 56.9982, GNorm = 1.1628, lr_0 = 4.7344e-04
Loss = 3.6110e-01, PNorm = 56.9997, GNorm = 1.1915, lr_0 = 4.7312e-04
Loss = 3.4785e-01, PNorm = 56.9974, GNorm = 1.4274, lr_0 = 4.7279e-04
Loss = 4.0620e-01, PNorm = 57.0005, GNorm = 1.8006, lr_0 = 4.7247e-04
Loss = 3.4917e-01, PNorm = 57.0109, GNorm = 1.3081, lr_0 = 4.7215e-04
Loss = 3.4663e-01, PNorm = 57.0238, GNorm = 1.7676, lr_0 = 4.7182e-04
Loss = 4.0061e-01, PNorm = 57.0308, GNorm = 1.3669, lr_0 = 4.7150e-04
Loss = 5.0596e-01, PNorm = 57.0406, GNorm = 0.8678, lr_0 = 4.7118e-04
Loss = 3.6567e-01, PNorm = 57.0446, GNorm = 1.2718, lr_0 = 4.7085e-04
Loss = 3.9284e-01, PNorm = 57.0541, GNorm = 1.8932, lr_0 = 4.7053e-04
Loss = 4.3972e-01, PNorm = 57.0696, GNorm = 1.2334, lr_0 = 4.7021e-04
Loss = 3.6848e-01, PNorm = 57.0769, GNorm = 1.0628, lr_0 = 4.6989e-04
Loss = 4.3719e-01, PNorm = 57.0821, GNorm = 2.2323, lr_0 = 4.6957e-04
Loss = 3.9916e-01, PNorm = 57.0897, GNorm = 1.1619, lr_0 = 4.6924e-04
Loss = 4.0096e-01, PNorm = 57.0922, GNorm = 0.9724, lr_0 = 4.6892e-04
Loss = 4.6487e-01, PNorm = 57.0968, GNorm = 1.5796, lr_0 = 4.6860e-04
Loss = 4.0822e-01, PNorm = 57.1070, GNorm = 1.5214, lr_0 = 4.6828e-04
Loss = 3.9646e-01, PNorm = 57.1182, GNorm = 1.8266, lr_0 = 4.6796e-04
Loss = 3.9754e-01, PNorm = 57.1285, GNorm = 1.6770, lr_0 = 4.6764e-04
Loss = 3.5942e-01, PNorm = 57.1324, GNorm = 1.1282, lr_0 = 4.6732e-04
Loss = 4.2258e-01, PNorm = 57.1360, GNorm = 1.1778, lr_0 = 4.6700e-04
Loss = 3.8644e-01, PNorm = 57.1494, GNorm = 1.3520, lr_0 = 4.6668e-04
Loss = 3.9306e-01, PNorm = 57.1569, GNorm = 1.2800, lr_0 = 4.6636e-04
Loss = 3.5280e-01, PNorm = 57.1685, GNorm = 1.5200, lr_0 = 4.6604e-04
Loss = 4.0914e-01, PNorm = 57.1779, GNorm = 1.8915, lr_0 = 4.6572e-04
Loss = 3.5839e-01, PNorm = 57.1870, GNorm = 1.6678, lr_0 = 4.6540e-04
Loss = 4.0679e-01, PNorm = 57.1890, GNorm = 1.1176, lr_0 = 4.6508e-04
Loss = 4.3212e-01, PNorm = 57.1970, GNorm = 2.3664, lr_0 = 4.6476e-04
Loss = 3.9418e-01, PNorm = 57.2091, GNorm = 1.0362, lr_0 = 4.6445e-04
Loss = 4.4270e-01, PNorm = 57.2142, GNorm = 1.6783, lr_0 = 4.6413e-04
Loss = 3.9018e-01, PNorm = 57.2269, GNorm = 1.3572, lr_0 = 4.6381e-04
Loss = 4.1347e-01, PNorm = 57.2406, GNorm = 1.2973, lr_0 = 4.6349e-04
Loss = 3.4656e-01, PNorm = 57.2525, GNorm = 1.0176, lr_0 = 4.6317e-04
Loss = 4.3604e-01, PNorm = 57.2560, GNorm = 1.4665, lr_0 = 4.6286e-04
Loss = 4.2259e-01, PNorm = 57.2547, GNorm = 1.3696, lr_0 = 4.6254e-04
Loss = 3.9855e-01, PNorm = 57.2637, GNorm = 1.5256, lr_0 = 4.6222e-04
Loss = 3.4378e-01, PNorm = 57.2725, GNorm = 0.9617, lr_0 = 4.6191e-04
Loss = 3.7148e-01, PNorm = 57.2794, GNorm = 1.4832, lr_0 = 4.6159e-04
Loss = 4.0530e-01, PNorm = 57.2770, GNorm = 1.9169, lr_0 = 4.6127e-04
Loss = 4.4952e-01, PNorm = 57.2854, GNorm = 1.4737, lr_0 = 4.6096e-04
Loss = 3.8032e-01, PNorm = 57.2956, GNorm = 1.7478, lr_0 = 4.6064e-04
Loss = 4.3855e-01, PNorm = 57.3071, GNorm = 1.1541, lr_0 = 4.6033e-04
Loss = 3.9112e-01, PNorm = 57.3181, GNorm = 1.7313, lr_0 = 4.6001e-04
Loss = 4.3407e-01, PNorm = 57.3248, GNorm = 3.8062, lr_0 = 4.5970e-04
Loss = 3.7446e-01, PNorm = 57.3307, GNorm = 0.9522, lr_0 = 4.5938e-04
Loss = 4.5080e-01, PNorm = 57.3338, GNorm = 1.0731, lr_0 = 4.5907e-04
Loss = 3.9344e-01, PNorm = 57.3411, GNorm = 1.2169, lr_0 = 4.5875e-04
Loss = 4.0373e-01, PNorm = 57.3465, GNorm = 1.2484, lr_0 = 4.5844e-04
Loss = 4.1071e-01, PNorm = 57.3550, GNorm = 1.5456, lr_0 = 4.5812e-04
Loss = 4.2163e-01, PNorm = 57.3616, GNorm = 1.3407, lr_0 = 4.5781e-04
Loss = 4.0205e-01, PNorm = 57.3711, GNorm = 1.3372, lr_0 = 4.5750e-04
Loss = 5.1842e-01, PNorm = 57.3762, GNorm = 1.5630, lr_0 = 4.5718e-04
Loss = 3.6180e-01, PNorm = 57.3834, GNorm = 1.5648, lr_0 = 4.5687e-04
Loss = 3.8176e-01, PNorm = 57.3935, GNorm = 2.1055, lr_0 = 4.5656e-04
Loss = 3.6436e-01, PNorm = 57.4005, GNorm = 1.6471, lr_0 = 4.5624e-04
Loss = 4.3518e-01, PNorm = 57.4185, GNorm = 1.2713, lr_0 = 4.5593e-04
Loss = 4.2119e-01, PNorm = 57.4260, GNorm = 1.1627, lr_0 = 4.5562e-04
Loss = 4.2998e-01, PNorm = 57.4329, GNorm = 1.3536, lr_0 = 4.5531e-04
Loss = 4.1060e-01, PNorm = 57.4455, GNorm = 1.2261, lr_0 = 4.5499e-04
Loss = 4.1193e-01, PNorm = 57.4515, GNorm = 2.5451, lr_0 = 4.5468e-04
Loss = 4.3467e-01, PNorm = 57.4650, GNorm = 1.3070, lr_0 = 4.5437e-04
Loss = 4.2591e-01, PNorm = 57.4756, GNorm = 1.5103, lr_0 = 4.5406e-04
Loss = 4.1377e-01, PNorm = 57.4861, GNorm = 1.5337, lr_0 = 4.5375e-04
Loss = 4.1071e-01, PNorm = 57.4924, GNorm = 1.1599, lr_0 = 4.5344e-04
Loss = 3.5797e-01, PNorm = 57.4998, GNorm = 1.2232, lr_0 = 4.5313e-04
Loss = 4.5000e-01, PNorm = 57.5005, GNorm = 2.5504, lr_0 = 4.5282e-04
Loss = 3.9099e-01, PNorm = 57.5066, GNorm = 1.0357, lr_0 = 4.5251e-04
Loss = 4.5750e-01, PNorm = 57.5135, GNorm = 0.7827, lr_0 = 4.5220e-04
Loss = 3.6929e-01, PNorm = 57.5223, GNorm = 1.1352, lr_0 = 4.5189e-04
Loss = 3.9226e-01, PNorm = 57.5273, GNorm = 1.4701, lr_0 = 4.5158e-04
Loss = 4.5049e-01, PNorm = 57.5327, GNorm = 1.3295, lr_0 = 4.5127e-04
Loss = 3.8466e-01, PNorm = 57.5377, GNorm = 0.8772, lr_0 = 4.5096e-04
Loss = 4.1654e-01, PNorm = 57.5432, GNorm = 1.4295, lr_0 = 4.5065e-04
Loss = 3.9756e-01, PNorm = 57.5475, GNorm = 1.3250, lr_0 = 4.5034e-04
Loss = 4.4250e-01, PNorm = 57.5602, GNorm = 1.6002, lr_0 = 4.5003e-04
Loss = 4.0328e-01, PNorm = 57.5663, GNorm = 1.1891, lr_0 = 4.4972e-04
Loss = 4.0543e-01, PNorm = 57.5744, GNorm = 0.8623, lr_0 = 4.4942e-04
Loss = 3.9096e-01, PNorm = 57.5797, GNorm = 1.5172, lr_0 = 4.4911e-04
Loss = 4.4901e-01, PNorm = 57.5884, GNorm = 1.8040, lr_0 = 4.4880e-04
Loss = 3.5810e-01, PNorm = 57.5977, GNorm = 1.0614, lr_0 = 4.4849e-04
Loss = 3.2564e-01, PNorm = 57.6033, GNorm = 1.1268, lr_0 = 4.4819e-04
Loss = 5.0860e-01, PNorm = 57.6094, GNorm = 1.8982, lr_0 = 4.4788e-04
Loss = 4.2815e-01, PNorm = 57.6167, GNorm = 1.0510, lr_0 = 4.4757e-04
Loss = 4.2491e-01, PNorm = 57.6251, GNorm = 0.9721, lr_0 = 4.4727e-04
Loss = 3.9921e-01, PNorm = 57.6342, GNorm = 1.4886, lr_0 = 4.4696e-04
Loss = 3.7826e-01, PNorm = 57.6392, GNorm = 1.3061, lr_0 = 4.4665e-04
Loss = 4.3900e-01, PNorm = 57.6439, GNorm = 1.3113, lr_0 = 4.4635e-04
Loss = 4.3745e-01, PNorm = 57.6515, GNorm = 3.1189, lr_0 = 4.4604e-04
Loss = 4.1838e-01, PNorm = 57.6533, GNorm = 1.6648, lr_0 = 4.4574e-04
Loss = 3.9082e-01, PNorm = 57.6597, GNorm = 1.3605, lr_0 = 4.4543e-04
Loss = 4.3867e-01, PNorm = 57.6668, GNorm = 1.2556, lr_0 = 4.4513e-04
Loss = 3.9631e-01, PNorm = 57.6715, GNorm = 1.1655, lr_0 = 4.4482e-04
Loss = 4.2889e-01, PNorm = 57.6733, GNorm = 1.1390, lr_0 = 4.4452e-04
Loss = 4.3547e-01, PNorm = 57.6787, GNorm = 1.8067, lr_0 = 4.4421e-04
Loss = 4.1406e-01, PNorm = 57.6866, GNorm = 1.4608, lr_0 = 4.4391e-04
Loss = 4.2114e-01, PNorm = 57.6889, GNorm = 1.5442, lr_0 = 4.4360e-04
Loss = 4.3367e-01, PNorm = 57.6929, GNorm = 1.2848, lr_0 = 4.4330e-04
Loss = 4.4117e-01, PNorm = 57.6977, GNorm = 1.3877, lr_0 = 4.4299e-04
Loss = 3.8927e-01, PNorm = 57.7051, GNorm = 1.4345, lr_0 = 4.4269e-04
Loss = 3.8702e-01, PNorm = 57.7131, GNorm = 1.7669, lr_0 = 4.4239e-04
Loss = 3.7784e-01, PNorm = 57.7162, GNorm = 1.2759, lr_0 = 4.4209e-04
Loss = 3.8791e-01, PNorm = 57.7204, GNorm = 1.0367, lr_0 = 4.4178e-04
Loss = 4.3847e-01, PNorm = 57.7280, GNorm = 1.8012, lr_0 = 4.4148e-04
Loss = 3.6254e-01, PNorm = 57.7360, GNorm = 0.8887, lr_0 = 4.4118e-04
Loss = 4.0937e-01, PNorm = 57.7425, GNorm = 1.8909, lr_0 = 4.4088e-04
Loss = 3.6146e-01, PNorm = 57.7518, GNorm = 1.2588, lr_0 = 4.4057e-04
Loss = 4.1350e-01, PNorm = 57.7643, GNorm = 1.4708, lr_0 = 4.4027e-04
Loss = 3.7339e-01, PNorm = 57.7724, GNorm = 1.9853, lr_0 = 4.3997e-04
Loss = 4.4866e-01, PNorm = 57.7747, GNorm = 1.6945, lr_0 = 4.3967e-04
Loss = 3.7274e-01, PNorm = 57.7856, GNorm = 2.1428, lr_0 = 4.3937e-04
Validation mae = 0.114712
Epoch 12
Loss = 3.8188e-01, PNorm = 57.7947, GNorm = 1.0881, lr_0 = 4.3907e-04
Loss = 3.8176e-01, PNorm = 57.8055, GNorm = 1.4634, lr_0 = 4.3877e-04
Loss = 3.5564e-01, PNorm = 57.8136, GNorm = 2.3123, lr_0 = 4.3846e-04
Loss = 4.1591e-01, PNorm = 57.8263, GNorm = 2.5488, lr_0 = 4.3816e-04
Loss = 3.7643e-01, PNorm = 57.8334, GNorm = 1.3982, lr_0 = 4.3786e-04
Loss = 4.3057e-01, PNorm = 57.8372, GNorm = 1.1576, lr_0 = 4.3756e-04
Loss = 3.7151e-01, PNorm = 57.8423, GNorm = 1.3000, lr_0 = 4.3726e-04
Loss = 4.1458e-01, PNorm = 57.8476, GNorm = 1.3193, lr_0 = 4.3696e-04
Loss = 4.6489e-01, PNorm = 57.8525, GNorm = 1.5058, lr_0 = 4.3667e-04
Loss = 4.3802e-01, PNorm = 57.8613, GNorm = 1.3588, lr_0 = 4.3637e-04
Loss = 4.5898e-01, PNorm = 57.8700, GNorm = 1.8190, lr_0 = 4.3607e-04
Loss = 4.3308e-01, PNorm = 57.8781, GNorm = 1.5662, lr_0 = 4.3577e-04
Loss = 3.7544e-01, PNorm = 57.8824, GNorm = 1.5180, lr_0 = 4.3547e-04
Loss = 3.8743e-01, PNorm = 57.8893, GNorm = 1.3829, lr_0 = 4.3517e-04
Loss = 3.7527e-01, PNorm = 57.8974, GNorm = 1.7861, lr_0 = 4.3487e-04
Loss = 4.3236e-01, PNorm = 57.9038, GNorm = 1.3962, lr_0 = 4.3458e-04
Loss = 4.1239e-01, PNorm = 57.9049, GNorm = 1.4571, lr_0 = 4.3428e-04
Loss = 4.1474e-01, PNorm = 57.9157, GNorm = 0.9979, lr_0 = 4.3398e-04
Loss = 4.0176e-01, PNorm = 57.9164, GNorm = 1.0998, lr_0 = 4.3368e-04
Loss = 3.9533e-01, PNorm = 57.9285, GNorm = 0.9758, lr_0 = 4.3339e-04
Loss = 3.9888e-01, PNorm = 57.9414, GNorm = 2.0432, lr_0 = 4.3309e-04
Loss = 4.1163e-01, PNorm = 57.9480, GNorm = 1.7259, lr_0 = 4.3279e-04
Loss = 3.8768e-01, PNorm = 57.9557, GNorm = 1.0722, lr_0 = 4.3250e-04
Loss = 3.8324e-01, PNorm = 57.9618, GNorm = 1.4710, lr_0 = 4.3220e-04
Loss = 3.7175e-01, PNorm = 57.9665, GNorm = 1.4789, lr_0 = 4.3190e-04
Loss = 3.7880e-01, PNorm = 57.9746, GNorm = 1.5070, lr_0 = 4.3161e-04
Loss = 4.3184e-01, PNorm = 57.9772, GNorm = 1.4159, lr_0 = 4.3131e-04
Loss = 3.9083e-01, PNorm = 57.9853, GNorm = 1.2505, lr_0 = 4.3102e-04
Loss = 3.8763e-01, PNorm = 57.9887, GNorm = 0.9723, lr_0 = 4.3072e-04
Loss = 3.8377e-01, PNorm = 57.9998, GNorm = 1.7828, lr_0 = 4.3043e-04
Loss = 4.5636e-01, PNorm = 58.0068, GNorm = 1.1763, lr_0 = 4.3013e-04
Loss = 3.9088e-01, PNorm = 58.0184, GNorm = 2.7437, lr_0 = 4.2984e-04
Loss = 4.1148e-01, PNorm = 58.0215, GNorm = 1.8752, lr_0 = 4.2954e-04
Loss = 3.8949e-01, PNorm = 58.0287, GNorm = 1.6564, lr_0 = 4.2925e-04
Loss = 4.2367e-01, PNorm = 58.0347, GNorm = 1.4444, lr_0 = 4.2895e-04
Loss = 4.0059e-01, PNorm = 58.0414, GNorm = 1.3188, lr_0 = 4.2866e-04
Loss = 4.0925e-01, PNorm = 58.0468, GNorm = 1.7748, lr_0 = 4.2837e-04
Loss = 4.1485e-01, PNorm = 58.0475, GNorm = 1.5589, lr_0 = 4.2807e-04
Loss = 3.6208e-01, PNorm = 58.0520, GNorm = 1.4580, lr_0 = 4.2778e-04
Loss = 3.8498e-01, PNorm = 58.0606, GNorm = 1.4534, lr_0 = 4.2749e-04
Loss = 4.2732e-01, PNorm = 58.0703, GNorm = 1.1523, lr_0 = 4.2719e-04
Loss = 4.0627e-01, PNorm = 58.0827, GNorm = 1.4055, lr_0 = 4.2690e-04
Loss = 3.2673e-01, PNorm = 58.0925, GNorm = 1.8516, lr_0 = 4.2661e-04
Loss = 4.1612e-01, PNorm = 58.0991, GNorm = 1.4600, lr_0 = 4.2632e-04
Loss = 4.0176e-01, PNorm = 58.1070, GNorm = 1.6430, lr_0 = 4.2602e-04
Loss = 3.7340e-01, PNorm = 58.1154, GNorm = 1.1001, lr_0 = 4.2573e-04
Loss = 4.0155e-01, PNorm = 58.1212, GNorm = 1.5006, lr_0 = 4.2544e-04
Loss = 3.5062e-01, PNorm = 58.1283, GNorm = 1.3710, lr_0 = 4.2515e-04
Loss = 4.3433e-01, PNorm = 58.1289, GNorm = 1.2613, lr_0 = 4.2486e-04
Loss = 4.1050e-01, PNorm = 58.1372, GNorm = 1.0152, lr_0 = 4.2457e-04
Loss = 4.2490e-01, PNorm = 58.1443, GNorm = 1.4231, lr_0 = 4.2428e-04
Loss = 3.8884e-01, PNorm = 58.1560, GNorm = 1.3433, lr_0 = 4.2399e-04
Loss = 4.5141e-01, PNorm = 58.1593, GNorm = 1.5734, lr_0 = 4.2370e-04
Loss = 3.8296e-01, PNorm = 58.1751, GNorm = 1.3439, lr_0 = 4.2340e-04
Loss = 3.5219e-01, PNorm = 58.1847, GNorm = 1.0607, lr_0 = 4.2311e-04
Loss = 4.2765e-01, PNorm = 58.1929, GNorm = 1.4042, lr_0 = 4.2283e-04
Loss = 3.9491e-01, PNorm = 58.2027, GNorm = 1.2041, lr_0 = 4.2254e-04
Loss = 4.0954e-01, PNorm = 58.2125, GNorm = 2.3849, lr_0 = 4.2225e-04
Loss = 4.2339e-01, PNorm = 58.2179, GNorm = 1.5433, lr_0 = 4.2196e-04
Loss = 4.9033e-01, PNorm = 58.2246, GNorm = 1.3996, lr_0 = 4.2167e-04
Loss = 3.3485e-01, PNorm = 58.2314, GNorm = 1.1826, lr_0 = 4.2138e-04
Loss = 4.2058e-01, PNorm = 58.2356, GNorm = 1.2955, lr_0 = 4.2109e-04
Loss = 3.8104e-01, PNorm = 58.2457, GNorm = 1.3024, lr_0 = 4.2080e-04
Loss = 3.9047e-01, PNorm = 58.2530, GNorm = 1.6878, lr_0 = 4.2051e-04
Loss = 3.8432e-01, PNorm = 58.2638, GNorm = 0.9386, lr_0 = 4.2023e-04
Loss = 3.9798e-01, PNorm = 58.2669, GNorm = 1.0296, lr_0 = 4.1994e-04
Loss = 3.8436e-01, PNorm = 58.2770, GNorm = 0.9547, lr_0 = 4.1965e-04
Loss = 3.7954e-01, PNorm = 58.2838, GNorm = 1.0191, lr_0 = 4.1936e-04
Loss = 4.1962e-01, PNorm = 58.2965, GNorm = 2.3255, lr_0 = 4.1907e-04
Loss = 4.0962e-01, PNorm = 58.3019, GNorm = 1.6044, lr_0 = 4.1879e-04
Loss = 4.0306e-01, PNorm = 58.3074, GNorm = 0.8963, lr_0 = 4.1850e-04
Loss = 4.4448e-01, PNorm = 58.3059, GNorm = 1.6079, lr_0 = 4.1821e-04
Loss = 4.2814e-01, PNorm = 58.3105, GNorm = 1.6138, lr_0 = 4.1793e-04
Loss = 4.2791e-01, PNorm = 58.3219, GNorm = 1.4786, lr_0 = 4.1764e-04
Loss = 4.3223e-01, PNorm = 58.3276, GNorm = 1.8182, lr_0 = 4.1736e-04
Loss = 4.0008e-01, PNorm = 58.3300, GNorm = 1.4199, lr_0 = 4.1707e-04
Loss = 3.8239e-01, PNorm = 58.3344, GNorm = 1.2417, lr_0 = 4.1678e-04
Loss = 4.0842e-01, PNorm = 58.3426, GNorm = 1.6002, lr_0 = 4.1650e-04
Loss = 3.8334e-01, PNorm = 58.3471, GNorm = 1.6720, lr_0 = 4.1621e-04
Loss = 3.6032e-01, PNorm = 58.3525, GNorm = 1.8073, lr_0 = 4.1593e-04
Loss = 5.6402e-01, PNorm = 58.3524, GNorm = 1.9146, lr_0 = 4.1564e-04
Loss = 4.3358e-01, PNorm = 58.3570, GNorm = 1.2533, lr_0 = 4.1536e-04
Loss = 3.6534e-01, PNorm = 58.3645, GNorm = 1.4832, lr_0 = 4.1507e-04
Loss = 4.5082e-01, PNorm = 58.3703, GNorm = 1.6794, lr_0 = 4.1479e-04
Loss = 3.7137e-01, PNorm = 58.3731, GNorm = 1.9625, lr_0 = 4.1450e-04
Loss = 4.0001e-01, PNorm = 58.3803, GNorm = 1.7477, lr_0 = 4.1422e-04
Loss = 5.3259e-01, PNorm = 58.3891, GNorm = 2.6652, lr_0 = 4.1394e-04
Loss = 3.9575e-01, PNorm = 58.4008, GNorm = 1.7518, lr_0 = 4.1365e-04
Loss = 3.6130e-01, PNorm = 58.4054, GNorm = 2.1691, lr_0 = 4.1337e-04
Loss = 3.9038e-01, PNorm = 58.4123, GNorm = 1.0891, lr_0 = 4.1309e-04
Loss = 3.7876e-01, PNorm = 58.4174, GNorm = 1.5006, lr_0 = 4.1280e-04
Loss = 3.9081e-01, PNorm = 58.4240, GNorm = 1.3215, lr_0 = 4.1252e-04
Loss = 4.0909e-01, PNorm = 58.4266, GNorm = 1.4961, lr_0 = 4.1224e-04
Loss = 4.0426e-01, PNorm = 58.4341, GNorm = 2.1732, lr_0 = 4.1196e-04
Loss = 3.9442e-01, PNorm = 58.4380, GNorm = 1.8298, lr_0 = 4.1167e-04
Loss = 4.3279e-01, PNorm = 58.4442, GNorm = 1.6866, lr_0 = 4.1139e-04
Loss = 3.9812e-01, PNorm = 58.4507, GNorm = 1.7955, lr_0 = 4.1111e-04
Loss = 3.8965e-01, PNorm = 58.4561, GNorm = 1.6475, lr_0 = 4.1083e-04
Loss = 3.9010e-01, PNorm = 58.4592, GNorm = 1.5899, lr_0 = 4.1055e-04
Loss = 3.6667e-01, PNorm = 58.4649, GNorm = 0.9975, lr_0 = 4.1027e-04
Loss = 4.2232e-01, PNorm = 58.4711, GNorm = 1.2436, lr_0 = 4.0998e-04
Loss = 4.0898e-01, PNorm = 58.4758, GNorm = 1.8972, lr_0 = 4.0970e-04
Loss = 3.5311e-01, PNorm = 58.4788, GNorm = 0.9771, lr_0 = 4.0942e-04
Loss = 3.5976e-01, PNorm = 58.4827, GNorm = 1.2467, lr_0 = 4.0914e-04
Loss = 3.5155e-01, PNorm = 58.4854, GNorm = 1.8740, lr_0 = 4.0886e-04
Loss = 3.8647e-01, PNorm = 58.4869, GNorm = 1.2774, lr_0 = 4.0858e-04
Loss = 4.1422e-01, PNorm = 58.4920, GNorm = 1.1775, lr_0 = 4.0830e-04
Loss = 3.7508e-01, PNorm = 58.4983, GNorm = 1.3580, lr_0 = 4.0802e-04
Loss = 4.3295e-01, PNorm = 58.5084, GNorm = 1.8285, lr_0 = 4.0774e-04
Loss = 4.2006e-01, PNorm = 58.5144, GNorm = 1.9251, lr_0 = 4.0746e-04
Loss = 3.8665e-01, PNorm = 58.5192, GNorm = 1.3418, lr_0 = 4.0718e-04
Loss = 3.9811e-01, PNorm = 58.5250, GNorm = 1.1635, lr_0 = 4.0691e-04
Loss = 3.8285e-01, PNorm = 58.5362, GNorm = 1.6806, lr_0 = 4.0663e-04
Loss = 3.9903e-01, PNorm = 58.5392, GNorm = 1.4654, lr_0 = 4.0635e-04
Loss = 3.9022e-01, PNorm = 58.5423, GNorm = 1.7379, lr_0 = 4.0607e-04
Loss = 4.2818e-01, PNorm = 58.5454, GNorm = 1.3583, lr_0 = 4.0579e-04
Loss = 4.0389e-01, PNorm = 58.5523, GNorm = 1.5364, lr_0 = 4.0551e-04
Loss = 3.8660e-01, PNorm = 58.5612, GNorm = 1.5935, lr_0 = 4.0524e-04
Loss = 3.8972e-01, PNorm = 58.5639, GNorm = 1.1098, lr_0 = 4.0496e-04
Loss = 4.6165e-01, PNorm = 58.5730, GNorm = 1.5891, lr_0 = 4.0468e-04
Validation mae = 0.115186
Epoch 13
Loss = 4.7082e-01, PNorm = 58.5777, GNorm = 1.8554, lr_0 = 4.0440e-04
Loss = 4.3259e-01, PNorm = 58.5877, GNorm = 1.0016, lr_0 = 4.0413e-04
Loss = 3.8864e-01, PNorm = 58.5928, GNorm = 1.1276, lr_0 = 4.0385e-04
Loss = 4.4028e-01, PNorm = 58.5973, GNorm = 1.5541, lr_0 = 4.0357e-04
Loss = 3.7420e-01, PNorm = 58.6061, GNorm = 1.5031, lr_0 = 4.0330e-04
Loss = 3.4604e-01, PNorm = 58.6096, GNorm = 1.0629, lr_0 = 4.0302e-04
Loss = 3.6682e-01, PNorm = 58.6172, GNorm = 1.1583, lr_0 = 4.0274e-04
Loss = 4.3086e-01, PNorm = 58.6262, GNorm = 1.1794, lr_0 = 4.0247e-04
Loss = 3.6990e-01, PNorm = 58.6339, GNorm = 1.6460, lr_0 = 4.0219e-04
Loss = 3.3978e-01, PNorm = 58.6386, GNorm = 1.4004, lr_0 = 4.0192e-04
Loss = 4.0686e-01, PNorm = 58.6429, GNorm = 0.8005, lr_0 = 4.0164e-04
Loss = 3.9589e-01, PNorm = 58.6438, GNorm = 1.5679, lr_0 = 4.0137e-04
Loss = 3.6062e-01, PNorm = 58.6513, GNorm = 1.3688, lr_0 = 4.0109e-04
Loss = 3.9814e-01, PNorm = 58.6543, GNorm = 1.3885, lr_0 = 4.0082e-04
Loss = 4.4518e-01, PNorm = 58.6618, GNorm = 1.3319, lr_0 = 4.0054e-04
Loss = 4.2062e-01, PNorm = 58.6679, GNorm = 1.9522, lr_0 = 4.0027e-04
Loss = 4.4798e-01, PNorm = 58.6770, GNorm = 1.8361, lr_0 = 3.9999e-04
Loss = 3.8918e-01, PNorm = 58.6816, GNorm = 1.0959, lr_0 = 3.9972e-04
Loss = 3.5912e-01, PNorm = 58.6854, GNorm = 1.7878, lr_0 = 3.9945e-04
Loss = 4.2931e-01, PNorm = 58.6918, GNorm = 0.8650, lr_0 = 3.9917e-04
Loss = 3.9331e-01, PNorm = 58.6962, GNorm = 1.3487, lr_0 = 3.9890e-04
Loss = 3.6353e-01, PNorm = 58.7088, GNorm = 1.0605, lr_0 = 3.9863e-04
Loss = 3.8032e-01, PNorm = 58.7156, GNorm = 1.4003, lr_0 = 3.9835e-04
Loss = 3.8966e-01, PNorm = 58.7241, GNorm = 1.2107, lr_0 = 3.9808e-04
Loss = 3.3628e-01, PNorm = 58.7303, GNorm = 1.4885, lr_0 = 3.9781e-04
Loss = 3.5423e-01, PNorm = 58.7301, GNorm = 1.3621, lr_0 = 3.9753e-04
Loss = 3.6154e-01, PNorm = 58.7353, GNorm = 0.9792, lr_0 = 3.9726e-04
Loss = 3.9698e-01, PNorm = 58.7423, GNorm = 0.9831, lr_0 = 3.9699e-04
Loss = 3.6654e-01, PNorm = 58.7477, GNorm = 1.2257, lr_0 = 3.9672e-04
Loss = 3.5615e-01, PNorm = 58.7496, GNorm = 1.7827, lr_0 = 3.9645e-04
Loss = 3.7959e-01, PNorm = 58.7551, GNorm = 1.5062, lr_0 = 3.9617e-04
Loss = 3.9235e-01, PNorm = 58.7626, GNorm = 1.3098, lr_0 = 3.9590e-04
Loss = 3.9023e-01, PNorm = 58.7681, GNorm = 1.5541, lr_0 = 3.9563e-04
Loss = 3.3256e-01, PNorm = 58.7717, GNorm = 1.3633, lr_0 = 3.9536e-04
Loss = 4.9249e-01, PNorm = 58.7746, GNorm = 1.7703, lr_0 = 3.9509e-04
Loss = 3.6954e-01, PNorm = 58.7769, GNorm = 1.3257, lr_0 = 3.9482e-04
Loss = 3.9917e-01, PNorm = 58.7841, GNorm = 1.0427, lr_0 = 3.9455e-04
Loss = 4.3688e-01, PNorm = 58.7923, GNorm = 1.5438, lr_0 = 3.9428e-04
Loss = 4.1880e-01, PNorm = 58.7980, GNorm = 1.7204, lr_0 = 3.9401e-04
Loss = 4.0643e-01, PNorm = 58.8035, GNorm = 1.1165, lr_0 = 3.9374e-04
Loss = 3.8812e-01, PNorm = 58.8129, GNorm = 1.0524, lr_0 = 3.9347e-04
Loss = 3.7321e-01, PNorm = 58.8145, GNorm = 1.7734, lr_0 = 3.9320e-04
Loss = 3.4353e-01, PNorm = 58.8189, GNorm = 1.0346, lr_0 = 3.9293e-04
Loss = 3.8311e-01, PNorm = 58.8257, GNorm = 1.3449, lr_0 = 3.9266e-04
Loss = 4.2698e-01, PNorm = 58.8322, GNorm = 1.3168, lr_0 = 3.9239e-04
Loss = 4.4624e-01, PNorm = 58.8406, GNorm = 1.3730, lr_0 = 3.9212e-04
Loss = 4.2789e-01, PNorm = 58.8507, GNorm = 1.4185, lr_0 = 3.9185e-04
Loss = 3.7395e-01, PNorm = 58.8549, GNorm = 1.4023, lr_0 = 3.9159e-04
Loss = 3.8173e-01, PNorm = 58.8572, GNorm = 1.1615, lr_0 = 3.9132e-04
Loss = 4.3198e-01, PNorm = 58.8662, GNorm = 1.7391, lr_0 = 3.9105e-04
Loss = 4.2170e-01, PNorm = 58.8730, GNorm = 0.9144, lr_0 = 3.9078e-04
Loss = 4.3163e-01, PNorm = 58.8811, GNorm = 1.1666, lr_0 = 3.9051e-04
Loss = 4.2596e-01, PNorm = 58.8816, GNorm = 1.4213, lr_0 = 3.9025e-04
Loss = 3.7851e-01, PNorm = 58.8864, GNorm = 1.2635, lr_0 = 3.8998e-04
Loss = 4.0535e-01, PNorm = 58.8940, GNorm = 1.2965, lr_0 = 3.8971e-04
Loss = 4.2237e-01, PNorm = 58.9064, GNorm = 1.0861, lr_0 = 3.8945e-04
Loss = 3.6831e-01, PNorm = 58.9129, GNorm = 1.5854, lr_0 = 3.8918e-04
Loss = 4.0117e-01, PNorm = 58.9203, GNorm = 1.2198, lr_0 = 3.8891e-04
Loss = 3.2964e-01, PNorm = 58.9226, GNorm = 1.2623, lr_0 = 3.8865e-04
Loss = 3.4112e-01, PNorm = 58.9273, GNorm = 1.4847, lr_0 = 3.8838e-04
Loss = 4.4570e-01, PNorm = 58.9336, GNorm = 1.2334, lr_0 = 3.8811e-04
Loss = 3.3427e-01, PNorm = 58.9400, GNorm = 1.4682, lr_0 = 3.8785e-04
Loss = 4.0083e-01, PNorm = 58.9447, GNorm = 1.3923, lr_0 = 3.8758e-04
Loss = 4.5380e-01, PNorm = 58.9457, GNorm = 1.7570, lr_0 = 3.8732e-04
Loss = 3.4905e-01, PNorm = 58.9515, GNorm = 1.2320, lr_0 = 3.8705e-04
Loss = 3.7691e-01, PNorm = 58.9601, GNorm = 1.3540, lr_0 = 3.8679e-04
Loss = 3.9416e-01, PNorm = 58.9635, GNorm = 1.7647, lr_0 = 3.8652e-04
Loss = 4.2530e-01, PNorm = 58.9722, GNorm = 1.1322, lr_0 = 3.8626e-04
Loss = 3.8055e-01, PNorm = 58.9789, GNorm = 1.8023, lr_0 = 3.8599e-04
Loss = 3.6029e-01, PNorm = 58.9882, GNorm = 1.0303, lr_0 = 3.8573e-04
Loss = 4.2392e-01, PNorm = 58.9899, GNorm = 1.7033, lr_0 = 3.8546e-04
Loss = 3.9866e-01, PNorm = 59.0020, GNorm = 1.3239, lr_0 = 3.8520e-04
Loss = 4.1303e-01, PNorm = 59.0084, GNorm = 1.1618, lr_0 = 3.8493e-04
Loss = 4.1947e-01, PNorm = 59.0189, GNorm = 1.1178, lr_0 = 3.8467e-04
Loss = 3.8109e-01, PNorm = 59.0245, GNorm = 1.2148, lr_0 = 3.8441e-04
Loss = 4.3359e-01, PNorm = 59.0295, GNorm = 1.4897, lr_0 = 3.8414e-04
Loss = 3.4654e-01, PNorm = 59.0349, GNorm = 1.2672, lr_0 = 3.8388e-04
Loss = 4.2810e-01, PNorm = 59.0400, GNorm = 1.0040, lr_0 = 3.8362e-04
Loss = 4.0691e-01, PNorm = 59.0477, GNorm = 1.6812, lr_0 = 3.8336e-04
Loss = 3.3370e-01, PNorm = 59.0525, GNorm = 1.0391, lr_0 = 3.8309e-04
Loss = 3.7380e-01, PNorm = 59.0562, GNorm = 1.2343, lr_0 = 3.8283e-04
Loss = 3.8167e-01, PNorm = 59.0613, GNorm = 1.1335, lr_0 = 3.8257e-04
Loss = 3.7093e-01, PNorm = 59.0648, GNorm = 1.1800, lr_0 = 3.8231e-04
Loss = 3.8234e-01, PNorm = 59.0650, GNorm = 1.3214, lr_0 = 3.8204e-04
Loss = 3.2760e-01, PNorm = 59.0619, GNorm = 1.0894, lr_0 = 3.8178e-04
Loss = 4.0370e-01, PNorm = 59.0693, GNorm = 1.3196, lr_0 = 3.8152e-04
Loss = 4.3310e-01, PNorm = 59.0770, GNorm = 1.3472, lr_0 = 3.8126e-04
Loss = 3.5303e-01, PNorm = 59.0848, GNorm = 1.1317, lr_0 = 3.8100e-04
Loss = 4.2458e-01, PNorm = 59.0912, GNorm = 1.1111, lr_0 = 3.8074e-04
Loss = 3.4833e-01, PNorm = 59.0986, GNorm = 1.4456, lr_0 = 3.8048e-04
Loss = 4.4960e-01, PNorm = 59.0995, GNorm = 1.7664, lr_0 = 3.8022e-04
Loss = 3.6012e-01, PNorm = 59.1091, GNorm = 1.4113, lr_0 = 3.7995e-04
Loss = 3.7893e-01, PNorm = 59.1148, GNorm = 0.8044, lr_0 = 3.7969e-04
Loss = 3.6465e-01, PNorm = 59.1201, GNorm = 1.4154, lr_0 = 3.7943e-04
Loss = 3.7773e-01, PNorm = 59.1247, GNorm = 1.5482, lr_0 = 3.7917e-04
Loss = 3.9560e-01, PNorm = 59.1336, GNorm = 2.6549, lr_0 = 3.7891e-04
Loss = 4.0635e-01, PNorm = 59.1362, GNorm = 2.2641, lr_0 = 3.7866e-04
Loss = 4.2991e-01, PNorm = 59.1454, GNorm = 2.1786, lr_0 = 3.7840e-04
Loss = 3.7924e-01, PNorm = 59.1497, GNorm = 1.8316, lr_0 = 3.7814e-04
Loss = 4.4672e-01, PNorm = 59.1588, GNorm = 1.4175, lr_0 = 3.7788e-04
Loss = 4.3272e-01, PNorm = 59.1612, GNorm = 1.7155, lr_0 = 3.7762e-04
Loss = 4.2146e-01, PNorm = 59.1612, GNorm = 1.6568, lr_0 = 3.7736e-04
Loss = 3.9496e-01, PNorm = 59.1648, GNorm = 1.8466, lr_0 = 3.7710e-04
Loss = 3.5422e-01, PNorm = 59.1686, GNorm = 1.2455, lr_0 = 3.7684e-04
Loss = 4.4802e-01, PNorm = 59.1695, GNorm = 1.7495, lr_0 = 3.7659e-04
Loss = 3.9674e-01, PNorm = 59.1755, GNorm = 1.4478, lr_0 = 3.7633e-04
Loss = 4.0081e-01, PNorm = 59.1835, GNorm = 1.1719, lr_0 = 3.7607e-04
Loss = 4.1752e-01, PNorm = 59.1839, GNorm = 1.7975, lr_0 = 3.7581e-04
Loss = 4.0454e-01, PNorm = 59.1846, GNorm = 1.0679, lr_0 = 3.7555e-04
Loss = 4.1683e-01, PNorm = 59.1830, GNorm = 1.3239, lr_0 = 3.7530e-04
Loss = 4.0126e-01, PNorm = 59.1926, GNorm = 1.2004, lr_0 = 3.7504e-04
Loss = 4.2190e-01, PNorm = 59.2001, GNorm = 1.6937, lr_0 = 3.7478e-04
Loss = 5.1795e-01, PNorm = 59.2100, GNorm = 1.2197, lr_0 = 3.7453e-04
Loss = 4.2614e-01, PNorm = 59.2175, GNorm = 1.4748, lr_0 = 3.7427e-04
Loss = 4.5700e-01, PNorm = 59.2220, GNorm = 1.4175, lr_0 = 3.7401e-04
Loss = 3.6900e-01, PNorm = 59.2262, GNorm = 2.1858, lr_0 = 3.7376e-04
Loss = 3.4032e-01, PNorm = 59.2329, GNorm = 1.2735, lr_0 = 3.7350e-04
Loss = 4.1307e-01, PNorm = 59.2379, GNorm = 1.6773, lr_0 = 3.7325e-04
Loss = 4.0346e-01, PNorm = 59.2427, GNorm = 1.5265, lr_0 = 3.7299e-04
Loss = 3.3085e-01, PNorm = 59.2492, GNorm = 1.4964, lr_0 = 3.7273e-04
Validation mae = 0.114306
Epoch 14
Loss = 3.8588e-01, PNorm = 59.2463, GNorm = 1.6286, lr_0 = 3.7248e-04
Loss = 4.0652e-01, PNorm = 59.2507, GNorm = 1.5592, lr_0 = 3.7222e-04
Loss = 3.5573e-01, PNorm = 59.2532, GNorm = 1.3582, lr_0 = 3.7197e-04
Loss = 3.5378e-01, PNorm = 59.2625, GNorm = 1.0312, lr_0 = 3.7171e-04
Loss = 3.7532e-01, PNorm = 59.2710, GNorm = 1.8601, lr_0 = 3.7146e-04
Loss = 4.1431e-01, PNorm = 59.2760, GNorm = 2.0836, lr_0 = 3.7120e-04
Loss = 3.3662e-01, PNorm = 59.2807, GNorm = 1.1988, lr_0 = 3.7095e-04
Loss = 3.6597e-01, PNorm = 59.2884, GNorm = 1.2998, lr_0 = 3.7070e-04
Loss = 4.0086e-01, PNorm = 59.2973, GNorm = 2.2311, lr_0 = 3.7044e-04
Loss = 4.2767e-01, PNorm = 59.3047, GNorm = 1.5650, lr_0 = 3.7019e-04
Loss = 3.8856e-01, PNorm = 59.3130, GNorm = 1.4900, lr_0 = 3.6993e-04
Loss = 3.6297e-01, PNorm = 59.3186, GNorm = 1.0904, lr_0 = 3.6968e-04
Loss = 3.4970e-01, PNorm = 59.3279, GNorm = 1.1814, lr_0 = 3.6943e-04
Loss = 4.2120e-01, PNorm = 59.3327, GNorm = 1.9948, lr_0 = 3.6917e-04
Loss = 3.4744e-01, PNorm = 59.3387, GNorm = 1.3329, lr_0 = 3.6892e-04
Loss = 4.2394e-01, PNorm = 59.3401, GNorm = 1.3727, lr_0 = 3.6867e-04
Loss = 4.6404e-01, PNorm = 59.3450, GNorm = 1.3692, lr_0 = 3.6842e-04
Loss = 4.1133e-01, PNorm = 59.3530, GNorm = 1.4262, lr_0 = 3.6816e-04
Loss = 3.5087e-01, PNorm = 59.3626, GNorm = 1.3548, lr_0 = 3.6791e-04
Loss = 3.5890e-01, PNorm = 59.3711, GNorm = 1.4316, lr_0 = 3.6766e-04
Loss = 3.7797e-01, PNorm = 59.3747, GNorm = 1.0993, lr_0 = 3.6741e-04
Loss = 3.5091e-01, PNorm = 59.3791, GNorm = 1.2258, lr_0 = 3.6716e-04
Loss = 3.8558e-01, PNorm = 59.3838, GNorm = 1.7395, lr_0 = 3.6690e-04
Loss = 4.1108e-01, PNorm = 59.3932, GNorm = 1.6925, lr_0 = 3.6665e-04
Loss = 4.8270e-01, PNorm = 59.3929, GNorm = 1.8319, lr_0 = 3.6640e-04
Loss = 3.6674e-01, PNorm = 59.3990, GNorm = 1.2645, lr_0 = 3.6615e-04
Loss = 3.6417e-01, PNorm = 59.4018, GNorm = 1.1499, lr_0 = 3.6590e-04
Loss = 4.4432e-01, PNorm = 59.4081, GNorm = 1.3672, lr_0 = 3.6565e-04
Loss = 3.8315e-01, PNorm = 59.4147, GNorm = 1.5147, lr_0 = 3.6540e-04
Loss = 4.7583e-01, PNorm = 59.4224, GNorm = 2.3445, lr_0 = 3.6515e-04
Loss = 3.8413e-01, PNorm = 59.4308, GNorm = 1.3810, lr_0 = 3.6490e-04
Loss = 3.5379e-01, PNorm = 59.4356, GNorm = 1.1537, lr_0 = 3.6465e-04
Loss = 3.9560e-01, PNorm = 59.4409, GNorm = 1.0297, lr_0 = 3.6440e-04
Loss = 4.2120e-01, PNorm = 59.4469, GNorm = 1.7068, lr_0 = 3.6415e-04
Loss = 4.3235e-01, PNorm = 59.4539, GNorm = 1.1955, lr_0 = 3.6390e-04
Loss = 3.5935e-01, PNorm = 59.4624, GNorm = 1.5792, lr_0 = 3.6365e-04
Loss = 3.4167e-01, PNorm = 59.4713, GNorm = 1.8126, lr_0 = 3.6340e-04
Loss = 3.4815e-01, PNorm = 59.4788, GNorm = 1.8476, lr_0 = 3.6315e-04
Loss = 4.0665e-01, PNorm = 59.4889, GNorm = 1.1987, lr_0 = 3.6290e-04
Loss = 3.8985e-01, PNorm = 59.4927, GNorm = 1.6531, lr_0 = 3.6266e-04
Loss = 4.0227e-01, PNorm = 59.4970, GNorm = 0.8817, lr_0 = 3.6241e-04
Loss = 4.0020e-01, PNorm = 59.5015, GNorm = 1.0451, lr_0 = 3.6216e-04
Loss = 4.0539e-01, PNorm = 59.5066, GNorm = 1.6019, lr_0 = 3.6191e-04
Loss = 4.1475e-01, PNorm = 59.5103, GNorm = 1.6080, lr_0 = 3.6166e-04
Loss = 4.1875e-01, PNorm = 59.5152, GNorm = 2.0635, lr_0 = 3.6141e-04
Loss = 4.2533e-01, PNorm = 59.5222, GNorm = 1.7444, lr_0 = 3.6117e-04
Loss = 3.7005e-01, PNorm = 59.5238, GNorm = 1.2608, lr_0 = 3.6092e-04
Loss = 4.1131e-01, PNorm = 59.5258, GNorm = 1.8447, lr_0 = 3.6067e-04
Loss = 3.9729e-01, PNorm = 59.5282, GNorm = 1.3095, lr_0 = 3.6043e-04
Loss = 3.8122e-01, PNorm = 59.5357, GNorm = 1.4255, lr_0 = 3.6018e-04
Loss = 3.8552e-01, PNorm = 59.5416, GNorm = 1.2846, lr_0 = 3.5993e-04
Loss = 3.7973e-01, PNorm = 59.5445, GNorm = 1.2322, lr_0 = 3.5969e-04
Loss = 3.9414e-01, PNorm = 59.5482, GNorm = 1.7657, lr_0 = 3.5944e-04
Loss = 3.5130e-01, PNorm = 59.5518, GNorm = 1.2758, lr_0 = 3.5919e-04
Loss = 4.6478e-01, PNorm = 59.5585, GNorm = 1.5161, lr_0 = 3.5895e-04
Loss = 4.1559e-01, PNorm = 59.5677, GNorm = 1.1724, lr_0 = 3.5870e-04
Loss = 3.9506e-01, PNorm = 59.5699, GNorm = 2.1459, lr_0 = 3.5845e-04
Loss = 3.6234e-01, PNorm = 59.5756, GNorm = 1.0203, lr_0 = 3.5821e-04
Loss = 4.2718e-01, PNorm = 59.5767, GNorm = 1.7197, lr_0 = 3.5796e-04
Loss = 3.8564e-01, PNorm = 59.5815, GNorm = 1.6413, lr_0 = 3.5772e-04
Loss = 3.6962e-01, PNorm = 59.5897, GNorm = 1.7918, lr_0 = 3.5747e-04
Loss = 3.9077e-01, PNorm = 59.5953, GNorm = 1.3213, lr_0 = 3.5723e-04
Loss = 4.3473e-01, PNorm = 59.6043, GNorm = 1.2785, lr_0 = 3.5698e-04
Loss = 3.7114e-01, PNorm = 59.6084, GNorm = 1.4736, lr_0 = 3.5674e-04
Loss = 3.4976e-01, PNorm = 59.6123, GNorm = 0.9451, lr_0 = 3.5650e-04
Loss = 3.9339e-01, PNorm = 59.6154, GNorm = 1.7574, lr_0 = 3.5625e-04
Loss = 3.7161e-01, PNorm = 59.6211, GNorm = 1.5965, lr_0 = 3.5601e-04
Loss = 3.7795e-01, PNorm = 59.6266, GNorm = 1.6797, lr_0 = 3.5576e-04
Loss = 4.7859e-01, PNorm = 59.6262, GNorm = 1.3065, lr_0 = 3.5552e-04
Loss = 3.8990e-01, PNorm = 59.6326, GNorm = 1.2290, lr_0 = 3.5528e-04
Loss = 3.9581e-01, PNorm = 59.6389, GNorm = 1.3164, lr_0 = 3.5503e-04
Loss = 4.6389e-01, PNorm = 59.6442, GNorm = 2.3994, lr_0 = 3.5479e-04
Loss = 3.9905e-01, PNorm = 59.6521, GNorm = 1.5785, lr_0 = 3.5455e-04
Loss = 3.3270e-01, PNorm = 59.6544, GNorm = 1.0909, lr_0 = 3.5430e-04
Loss = 3.4945e-01, PNorm = 59.6577, GNorm = 1.5988, lr_0 = 3.5406e-04
Loss = 4.1567e-01, PNorm = 59.6651, GNorm = 1.3963, lr_0 = 3.5382e-04
Loss = 3.6933e-01, PNorm = 59.6652, GNorm = 1.0870, lr_0 = 3.5358e-04
Loss = 3.9772e-01, PNorm = 59.6649, GNorm = 1.3740, lr_0 = 3.5333e-04
Loss = 4.5071e-01, PNorm = 59.6697, GNorm = 1.8418, lr_0 = 3.5309e-04
Loss = 3.3706e-01, PNorm = 59.6762, GNorm = 1.5792, lr_0 = 3.5285e-04
Loss = 3.8213e-01, PNorm = 59.6805, GNorm = 1.3804, lr_0 = 3.5261e-04
Loss = 3.2451e-01, PNorm = 59.6844, GNorm = 0.8937, lr_0 = 3.5237e-04
Loss = 3.7288e-01, PNorm = 59.6858, GNorm = 1.8915, lr_0 = 3.5212e-04
Loss = 3.9772e-01, PNorm = 59.6947, GNorm = 1.6448, lr_0 = 3.5188e-04
Loss = 4.0008e-01, PNorm = 59.7044, GNorm = 1.4874, lr_0 = 3.5164e-04
Loss = 4.0574e-01, PNorm = 59.7090, GNorm = 1.4678, lr_0 = 3.5140e-04
Loss = 4.3181e-01, PNorm = 59.7165, GNorm = 1.4033, lr_0 = 3.5116e-04
Loss = 3.8918e-01, PNorm = 59.7214, GNorm = 1.4073, lr_0 = 3.5092e-04
Loss = 4.0813e-01, PNorm = 59.7232, GNorm = 1.0058, lr_0 = 3.5068e-04
Loss = 4.4414e-01, PNorm = 59.7314, GNorm = 1.1577, lr_0 = 3.5044e-04
Loss = 3.9698e-01, PNorm = 59.7298, GNorm = 1.1726, lr_0 = 3.5020e-04
Loss = 3.7685e-01, PNorm = 59.7326, GNorm = 1.0890, lr_0 = 3.4996e-04
Loss = 3.9051e-01, PNorm = 59.7402, GNorm = 1.8427, lr_0 = 3.4972e-04
Loss = 3.5843e-01, PNorm = 59.7425, GNorm = 1.2973, lr_0 = 3.4948e-04
Loss = 3.4696e-01, PNorm = 59.7547, GNorm = 0.9455, lr_0 = 3.4924e-04
Loss = 3.2308e-01, PNorm = 59.7571, GNorm = 1.2183, lr_0 = 3.4900e-04
Loss = 3.6993e-01, PNorm = 59.7644, GNorm = 1.1867, lr_0 = 3.4876e-04
Loss = 3.9172e-01, PNorm = 59.7759, GNorm = 2.6612, lr_0 = 3.4852e-04
Loss = 3.6058e-01, PNorm = 59.7842, GNorm = 1.4113, lr_0 = 3.4828e-04
Loss = 4.5821e-01, PNorm = 59.7885, GNorm = 1.3661, lr_0 = 3.4805e-04
Loss = 3.7239e-01, PNorm = 59.7974, GNorm = 1.2814, lr_0 = 3.4781e-04
Loss = 3.8382e-01, PNorm = 59.8017, GNorm = 0.8392, lr_0 = 3.4757e-04
Loss = 3.9249e-01, PNorm = 59.8027, GNorm = 0.7580, lr_0 = 3.4733e-04
Loss = 4.4143e-01, PNorm = 59.8026, GNorm = 1.1392, lr_0 = 3.4709e-04
Loss = 3.5924e-01, PNorm = 59.8083, GNorm = 1.4172, lr_0 = 3.4686e-04
Loss = 4.0471e-01, PNorm = 59.8129, GNorm = 1.2447, lr_0 = 3.4662e-04
Loss = 3.9502e-01, PNorm = 59.8194, GNorm = 1.6806, lr_0 = 3.4638e-04
Loss = 3.5215e-01, PNorm = 59.8208, GNorm = 1.1657, lr_0 = 3.4614e-04
Loss = 4.2359e-01, PNorm = 59.8261, GNorm = 1.8294, lr_0 = 3.4591e-04
Loss = 3.3130e-01, PNorm = 59.8343, GNorm = 1.3473, lr_0 = 3.4567e-04
Loss = 3.2415e-01, PNorm = 59.8393, GNorm = 1.2302, lr_0 = 3.4543e-04
Loss = 3.6078e-01, PNorm = 59.8416, GNorm = 0.9431, lr_0 = 3.4520e-04
Loss = 3.6253e-01, PNorm = 59.8466, GNorm = 1.2976, lr_0 = 3.4496e-04
Loss = 3.7254e-01, PNorm = 59.8563, GNorm = 1.5220, lr_0 = 3.4472e-04
Loss = 3.6233e-01, PNorm = 59.8600, GNorm = 1.4351, lr_0 = 3.4449e-04
Loss = 3.7711e-01, PNorm = 59.8680, GNorm = 1.2947, lr_0 = 3.4425e-04
Loss = 4.8167e-01, PNorm = 59.8693, GNorm = 1.3347, lr_0 = 3.4402e-04
Loss = 3.9392e-01, PNorm = 59.8768, GNorm = 1.0587, lr_0 = 3.4378e-04
Loss = 3.7107e-01, PNorm = 59.8809, GNorm = 1.6537, lr_0 = 3.4354e-04
Loss = 4.0033e-01, PNorm = 59.8805, GNorm = 1.3674, lr_0 = 3.4331e-04
Validation mae = 0.113506
Epoch 15
Loss = 3.5016e-01, PNorm = 59.8816, GNorm = 0.9309, lr_0 = 3.4307e-04
Loss = 3.7410e-01, PNorm = 59.8871, GNorm = 1.7192, lr_0 = 3.4284e-04
Loss = 3.9188e-01, PNorm = 59.8930, GNorm = 1.5528, lr_0 = 3.4260e-04
Loss = 3.8930e-01, PNorm = 59.9013, GNorm = 1.4487, lr_0 = 3.4237e-04
Loss = 3.7136e-01, PNorm = 59.9063, GNorm = 1.5349, lr_0 = 3.4213e-04
Loss = 3.4250e-01, PNorm = 59.9098, GNorm = 1.8517, lr_0 = 3.4190e-04
Loss = 3.9878e-01, PNorm = 59.9110, GNorm = 1.5396, lr_0 = 3.4167e-04
Loss = 4.3270e-01, PNorm = 59.9155, GNorm = 1.5870, lr_0 = 3.4143e-04
Loss = 3.6764e-01, PNorm = 59.9211, GNorm = 1.2962, lr_0 = 3.4120e-04
Loss = 4.0452e-01, PNorm = 59.9228, GNorm = 1.8326, lr_0 = 3.4096e-04
Loss = 3.8408e-01, PNorm = 59.9257, GNorm = 1.3251, lr_0 = 3.4073e-04
Loss = 3.5110e-01, PNorm = 59.9284, GNorm = 1.4756, lr_0 = 3.4050e-04
Loss = 3.8215e-01, PNorm = 59.9330, GNorm = 1.5943, lr_0 = 3.4026e-04
Loss = 4.1701e-01, PNorm = 59.9378, GNorm = 1.3127, lr_0 = 3.4003e-04
Loss = 3.6689e-01, PNorm = 59.9438, GNorm = 1.2356, lr_0 = 3.3980e-04
Loss = 3.6325e-01, PNorm = 59.9459, GNorm = 1.0484, lr_0 = 3.3956e-04
Loss = 4.0480e-01, PNorm = 59.9478, GNorm = 2.1228, lr_0 = 3.3933e-04
Loss = 3.5318e-01, PNorm = 59.9532, GNorm = 1.1224, lr_0 = 3.3910e-04
Loss = 3.8454e-01, PNorm = 59.9604, GNorm = 1.1473, lr_0 = 3.3887e-04
Loss = 3.8532e-01, PNorm = 59.9698, GNorm = 1.2258, lr_0 = 3.3864e-04
Loss = 3.2049e-01, PNorm = 59.9767, GNorm = 1.4514, lr_0 = 3.3840e-04
Loss = 3.7510e-01, PNorm = 59.9799, GNorm = 1.7035, lr_0 = 3.3817e-04
Loss = 4.2093e-01, PNorm = 59.9864, GNorm = 1.2755, lr_0 = 3.3794e-04
Loss = 3.3556e-01, PNorm = 59.9897, GNorm = 1.3944, lr_0 = 3.3771e-04
Loss = 3.8056e-01, PNorm = 59.9918, GNorm = 1.6511, lr_0 = 3.3748e-04
Loss = 4.2888e-01, PNorm = 59.9947, GNorm = 1.6809, lr_0 = 3.3725e-04
Loss = 3.8434e-01, PNorm = 60.0025, GNorm = 1.5381, lr_0 = 3.3701e-04
Loss = 4.0064e-01, PNorm = 60.0077, GNorm = 1.3872, lr_0 = 3.3678e-04
Loss = 3.2075e-01, PNorm = 60.0121, GNorm = 1.0977, lr_0 = 3.3655e-04
Loss = 3.4084e-01, PNorm = 60.0210, GNorm = 2.0871, lr_0 = 3.3632e-04
Loss = 4.2318e-01, PNorm = 60.0210, GNorm = 1.4363, lr_0 = 3.3609e-04
Loss = 4.7937e-01, PNorm = 60.0259, GNorm = 1.5414, lr_0 = 3.3586e-04
Loss = 4.0870e-01, PNorm = 60.0360, GNorm = 1.3645, lr_0 = 3.3563e-04
Loss = 4.0429e-01, PNorm = 60.0410, GNorm = 1.0429, lr_0 = 3.3540e-04
Loss = 3.8875e-01, PNorm = 60.0457, GNorm = 1.5028, lr_0 = 3.3517e-04
Loss = 4.1295e-01, PNorm = 60.0480, GNorm = 1.7014, lr_0 = 3.3494e-04
Loss = 3.5775e-01, PNorm = 60.0526, GNorm = 1.1055, lr_0 = 3.3471e-04
Loss = 4.0740e-01, PNorm = 60.0581, GNorm = 1.2102, lr_0 = 3.3448e-04
Loss = 3.5743e-01, PNorm = 60.0657, GNorm = 1.2257, lr_0 = 3.3425e-04
Loss = 3.6533e-01, PNorm = 60.0683, GNorm = 1.0202, lr_0 = 3.3403e-04
Loss = 3.4462e-01, PNorm = 60.0725, GNorm = 1.2438, lr_0 = 3.3380e-04
Loss = 3.4652e-01, PNorm = 60.0748, GNorm = 1.7009, lr_0 = 3.3357e-04
Loss = 3.9347e-01, PNorm = 60.0809, GNorm = 1.4370, lr_0 = 3.3334e-04
Loss = 4.4830e-01, PNorm = 60.0828, GNorm = 1.3891, lr_0 = 3.3311e-04
Loss = 4.0816e-01, PNorm = 60.0852, GNorm = 1.4353, lr_0 = 3.3288e-04
Loss = 4.0947e-01, PNorm = 60.0903, GNorm = 0.9989, lr_0 = 3.3265e-04
Loss = 3.8748e-01, PNorm = 60.0916, GNorm = 1.3916, lr_0 = 3.3243e-04
Loss = 4.1733e-01, PNorm = 60.0923, GNorm = 1.7580, lr_0 = 3.3220e-04
Loss = 3.5791e-01, PNorm = 60.0976, GNorm = 1.4874, lr_0 = 3.3197e-04
Loss = 3.9612e-01, PNorm = 60.1028, GNorm = 1.7709, lr_0 = 3.3174e-04
Loss = 3.6137e-01, PNorm = 60.1134, GNorm = 1.2310, lr_0 = 3.3152e-04
Loss = 4.1212e-01, PNorm = 60.1180, GNorm = 1.4610, lr_0 = 3.3129e-04
Loss = 4.3292e-01, PNorm = 60.1241, GNorm = 1.6841, lr_0 = 3.3106e-04
Loss = 4.2412e-01, PNorm = 60.1275, GNorm = 1.3074, lr_0 = 3.3084e-04
Loss = 3.7552e-01, PNorm = 60.1317, GNorm = 1.3354, lr_0 = 3.3061e-04
Loss = 3.2934e-01, PNorm = 60.1401, GNorm = 1.2330, lr_0 = 3.3038e-04
Loss = 3.9705e-01, PNorm = 60.1446, GNorm = 1.6009, lr_0 = 3.3016e-04
Loss = 4.2224e-01, PNorm = 60.1487, GNorm = 1.4651, lr_0 = 3.2993e-04
Loss = 3.9616e-01, PNorm = 60.1519, GNorm = 1.0920, lr_0 = 3.2970e-04
Loss = 3.4957e-01, PNorm = 60.1569, GNorm = 1.0213, lr_0 = 3.2948e-04
Loss = 3.2122e-01, PNorm = 60.1632, GNorm = 1.3619, lr_0 = 3.2925e-04
Loss = 3.5333e-01, PNorm = 60.1648, GNorm = 1.2779, lr_0 = 3.2903e-04
Loss = 3.8179e-01, PNorm = 60.1698, GNorm = 1.1726, lr_0 = 3.2880e-04
Loss = 3.7973e-01, PNorm = 60.1733, GNorm = 2.0380, lr_0 = 3.2858e-04
Loss = 3.9628e-01, PNorm = 60.1782, GNorm = 1.4106, lr_0 = 3.2835e-04
Loss = 3.7086e-01, PNorm = 60.1803, GNorm = 1.2633, lr_0 = 3.2813e-04
Loss = 4.0929e-01, PNorm = 60.1917, GNorm = 1.2159, lr_0 = 3.2790e-04
Loss = 3.4624e-01, PNorm = 60.2001, GNorm = 1.5677, lr_0 = 3.2768e-04
Loss = 4.0854e-01, PNorm = 60.2030, GNorm = 2.4078, lr_0 = 3.2745e-04
Loss = 3.5628e-01, PNorm = 60.2059, GNorm = 1.4455, lr_0 = 3.2723e-04
Loss = 3.5049e-01, PNorm = 60.2070, GNorm = 1.4223, lr_0 = 3.2700e-04
Loss = 3.4351e-01, PNorm = 60.2073, GNorm = 1.2126, lr_0 = 3.2678e-04
Loss = 3.5325e-01, PNorm = 60.2126, GNorm = 1.3835, lr_0 = 3.2656e-04
Loss = 3.7722e-01, PNorm = 60.2188, GNorm = 1.1855, lr_0 = 3.2633e-04
Loss = 4.0950e-01, PNorm = 60.2224, GNorm = 1.4224, lr_0 = 3.2611e-04
Loss = 3.6934e-01, PNorm = 60.2272, GNorm = 0.9210, lr_0 = 3.2589e-04
Loss = 4.4436e-01, PNorm = 60.2335, GNorm = 1.6986, lr_0 = 3.2566e-04
Loss = 3.9159e-01, PNorm = 60.2391, GNorm = 1.5524, lr_0 = 3.2544e-04
Loss = 3.8559e-01, PNorm = 60.2432, GNorm = 0.9592, lr_0 = 3.2522e-04
Loss = 4.3844e-01, PNorm = 60.2444, GNorm = 2.0558, lr_0 = 3.2499e-04
Loss = 3.8896e-01, PNorm = 60.2437, GNorm = 1.6560, lr_0 = 3.2477e-04
Loss = 3.5278e-01, PNorm = 60.2515, GNorm = 1.2220, lr_0 = 3.2455e-04
Loss = 3.7288e-01, PNorm = 60.2589, GNorm = 1.6378, lr_0 = 3.2433e-04
Loss = 4.0103e-01, PNorm = 60.2643, GNorm = 1.5065, lr_0 = 3.2410e-04
Loss = 3.5690e-01, PNorm = 60.2689, GNorm = 1.2258, lr_0 = 3.2388e-04
Loss = 3.6319e-01, PNorm = 60.2774, GNorm = 1.7183, lr_0 = 3.2366e-04
Loss = 3.6231e-01, PNorm = 60.2799, GNorm = 1.0975, lr_0 = 3.2344e-04
Loss = 4.3170e-01, PNorm = 60.2840, GNorm = 1.6540, lr_0 = 3.2322e-04
Loss = 3.8740e-01, PNorm = 60.2892, GNorm = 1.6910, lr_0 = 3.2300e-04
Loss = 3.5330e-01, PNorm = 60.2954, GNorm = 1.4388, lr_0 = 3.2277e-04
Loss = 3.8957e-01, PNorm = 60.2989, GNorm = 1.2821, lr_0 = 3.2255e-04
Loss = 3.5826e-01, PNorm = 60.3042, GNorm = 1.4282, lr_0 = 3.2233e-04
Loss = 3.8164e-01, PNorm = 60.3047, GNorm = 1.1602, lr_0 = 3.2211e-04
Loss = 3.6545e-01, PNorm = 60.3095, GNorm = 1.7302, lr_0 = 3.2189e-04
Loss = 3.9000e-01, PNorm = 60.3154, GNorm = 1.1182, lr_0 = 3.2167e-04
Loss = 3.2131e-01, PNorm = 60.3195, GNorm = 1.0516, lr_0 = 3.2145e-04
Loss = 3.7576e-01, PNorm = 60.3230, GNorm = 1.9587, lr_0 = 3.2123e-04
Loss = 4.0764e-01, PNorm = 60.3281, GNorm = 1.2487, lr_0 = 3.2101e-04
Loss = 4.1106e-01, PNorm = 60.3317, GNorm = 1.3606, lr_0 = 3.2079e-04
Loss = 3.5508e-01, PNorm = 60.3324, GNorm = 1.4690, lr_0 = 3.2057e-04
Loss = 3.5286e-01, PNorm = 60.3389, GNorm = 0.9457, lr_0 = 3.2035e-04
Loss = 4.1803e-01, PNorm = 60.3417, GNorm = 1.6679, lr_0 = 3.2013e-04
Loss = 4.2067e-01, PNorm = 60.3501, GNorm = 1.5854, lr_0 = 3.1991e-04
Loss = 3.4481e-01, PNorm = 60.3581, GNorm = 1.2724, lr_0 = 3.1969e-04
Loss = 3.3674e-01, PNorm = 60.3586, GNorm = 2.1258, lr_0 = 3.1947e-04
Loss = 3.9604e-01, PNorm = 60.3601, GNorm = 1.0381, lr_0 = 3.1925e-04
Loss = 3.9374e-01, PNorm = 60.3611, GNorm = 1.4813, lr_0 = 3.1904e-04
Loss = 3.7829e-01, PNorm = 60.3651, GNorm = 2.4503, lr_0 = 3.1882e-04
Loss = 3.1539e-01, PNorm = 60.3626, GNorm = 1.2405, lr_0 = 3.1860e-04
Loss = 3.8762e-01, PNorm = 60.3670, GNorm = 1.8990, lr_0 = 3.1838e-04
Loss = 3.8924e-01, PNorm = 60.3711, GNorm = 1.4243, lr_0 = 3.1816e-04
Loss = 4.2671e-01, PNorm = 60.3746, GNorm = 1.1082, lr_0 = 3.1794e-04
Loss = 3.3756e-01, PNorm = 60.3761, GNorm = 1.9112, lr_0 = 3.1773e-04
Loss = 4.3471e-01, PNorm = 60.3798, GNorm = 2.0275, lr_0 = 3.1751e-04
Loss = 3.3718e-01, PNorm = 60.3866, GNorm = 1.5352, lr_0 = 3.1729e-04
Loss = 4.0437e-01, PNorm = 60.3904, GNorm = 1.5995, lr_0 = 3.1707e-04
Loss = 4.1050e-01, PNorm = 60.3964, GNorm = 1.5827, lr_0 = 3.1686e-04
Loss = 4.0101e-01, PNorm = 60.3981, GNorm = 1.1042, lr_0 = 3.1664e-04
Loss = 3.9670e-01, PNorm = 60.4009, GNorm = 1.3572, lr_0 = 3.1642e-04
Loss = 3.7335e-01, PNorm = 60.4062, GNorm = 1.3966, lr_0 = 3.1621e-04
Validation mae = 0.112956
Epoch 16
Loss = 4.0172e-01, PNorm = 60.4089, GNorm = 1.2527, lr_0 = 3.1599e-04
Loss = 3.7003e-01, PNorm = 60.4130, GNorm = 1.3451, lr_0 = 3.1577e-04
Loss = 3.8942e-01, PNorm = 60.4198, GNorm = 1.2421, lr_0 = 3.1556e-04
Loss = 3.8016e-01, PNorm = 60.4233, GNorm = 1.2212, lr_0 = 3.1534e-04
Loss = 3.8369e-01, PNorm = 60.4300, GNorm = 1.1379, lr_0 = 3.1512e-04
Loss = 4.2810e-01, PNorm = 60.4328, GNorm = 1.2883, lr_0 = 3.1491e-04
Loss = 3.4262e-01, PNorm = 60.4382, GNorm = 1.3412, lr_0 = 3.1469e-04
Loss = 3.8842e-01, PNorm = 60.4432, GNorm = 1.3016, lr_0 = 3.1448e-04
Loss = 3.4130e-01, PNorm = 60.4464, GNorm = 1.0556, lr_0 = 3.1426e-04
Loss = 4.4274e-01, PNorm = 60.4498, GNorm = 2.0091, lr_0 = 3.1405e-04
Loss = 3.6629e-01, PNorm = 60.4527, GNorm = 1.3521, lr_0 = 3.1383e-04
Loss = 4.3386e-01, PNorm = 60.4579, GNorm = 1.2625, lr_0 = 3.1362e-04
Loss = 3.8534e-01, PNorm = 60.4614, GNorm = 1.9735, lr_0 = 3.1340e-04
Loss = 3.7504e-01, PNorm = 60.4702, GNorm = 1.2955, lr_0 = 3.1319e-04
Loss = 3.9846e-01, PNorm = 60.4740, GNorm = 1.9897, lr_0 = 3.1297e-04
Loss = 4.0743e-01, PNorm = 60.4774, GNorm = 1.1310, lr_0 = 3.1276e-04
Loss = 3.8687e-01, PNorm = 60.4828, GNorm = 1.2843, lr_0 = 3.1254e-04
Loss = 4.0077e-01, PNorm = 60.4859, GNorm = 1.3677, lr_0 = 3.1233e-04
Loss = 4.5581e-01, PNorm = 60.4879, GNorm = 1.4351, lr_0 = 3.1212e-04
Loss = 3.2940e-01, PNorm = 60.4956, GNorm = 1.3764, lr_0 = 3.1190e-04
Loss = 4.0225e-01, PNorm = 60.5003, GNorm = 1.2265, lr_0 = 3.1169e-04
Loss = 3.4174e-01, PNorm = 60.5081, GNorm = 1.0764, lr_0 = 3.1147e-04
Loss = 3.9280e-01, PNorm = 60.5084, GNorm = 1.7556, lr_0 = 3.1126e-04
Loss = 3.9479e-01, PNorm = 60.5134, GNorm = 1.3005, lr_0 = 3.1105e-04
Loss = 3.6981e-01, PNorm = 60.5178, GNorm = 1.4337, lr_0 = 3.1083e-04
Loss = 3.7219e-01, PNorm = 60.5188, GNorm = 2.2954, lr_0 = 3.1062e-04
Loss = 3.8596e-01, PNorm = 60.5228, GNorm = 1.7980, lr_0 = 3.1041e-04
Loss = 3.8363e-01, PNorm = 60.5260, GNorm = 0.9652, lr_0 = 3.1020e-04
Loss = 4.0342e-01, PNorm = 60.5299, GNorm = 1.1177, lr_0 = 3.0998e-04
Loss = 4.4771e-01, PNorm = 60.5333, GNorm = 1.7145, lr_0 = 3.0977e-04
Loss = 3.5699e-01, PNorm = 60.5378, GNorm = 1.4015, lr_0 = 3.0956e-04
Loss = 3.7617e-01, PNorm = 60.5403, GNorm = 1.5926, lr_0 = 3.0935e-04
Loss = 3.7983e-01, PNorm = 60.5451, GNorm = 1.4152, lr_0 = 3.0914e-04
Loss = 3.3699e-01, PNorm = 60.5478, GNorm = 1.0552, lr_0 = 3.0892e-04
Loss = 3.8476e-01, PNorm = 60.5519, GNorm = 1.1918, lr_0 = 3.0871e-04
Loss = 3.5409e-01, PNorm = 60.5547, GNorm = 1.2358, lr_0 = 3.0850e-04
Loss = 3.4525e-01, PNorm = 60.5591, GNorm = 0.8908, lr_0 = 3.0829e-04
Loss = 3.6381e-01, PNorm = 60.5669, GNorm = 1.1117, lr_0 = 3.0808e-04
Loss = 4.0624e-01, PNorm = 60.5717, GNorm = 1.3115, lr_0 = 3.0787e-04
Loss = 3.9810e-01, PNorm = 60.5777, GNorm = 1.6160, lr_0 = 3.0766e-04
Loss = 3.8686e-01, PNorm = 60.5795, GNorm = 1.6458, lr_0 = 3.0745e-04
Loss = 4.2729e-01, PNorm = 60.5817, GNorm = 1.9235, lr_0 = 3.0723e-04
Loss = 3.9332e-01, PNorm = 60.5861, GNorm = 1.2925, lr_0 = 3.0702e-04
Loss = 3.6107e-01, PNorm = 60.5902, GNorm = 1.3317, lr_0 = 3.0681e-04
Loss = 3.6897e-01, PNorm = 60.5948, GNorm = 1.1509, lr_0 = 3.0660e-04
Loss = 3.9754e-01, PNorm = 60.6000, GNorm = 1.1864, lr_0 = 3.0639e-04
Loss = 3.8231e-01, PNorm = 60.6063, GNorm = 1.2931, lr_0 = 3.0618e-04
Loss = 3.7324e-01, PNorm = 60.6098, GNorm = 1.7585, lr_0 = 3.0597e-04
Loss = 3.4358e-01, PNorm = 60.6131, GNorm = 1.2615, lr_0 = 3.0576e-04
Loss = 3.2982e-01, PNorm = 60.6195, GNorm = 1.2809, lr_0 = 3.0555e-04
Loss = 3.7184e-01, PNorm = 60.6286, GNorm = 1.0717, lr_0 = 3.0535e-04
Loss = 3.3947e-01, PNorm = 60.6329, GNorm = 1.6259, lr_0 = 3.0514e-04
Loss = 3.7852e-01, PNorm = 60.6351, GNorm = 1.1369, lr_0 = 3.0493e-04
Loss = 3.9890e-01, PNorm = 60.6417, GNorm = 1.5144, lr_0 = 3.0472e-04
Loss = 4.4907e-01, PNorm = 60.6462, GNorm = 2.2164, lr_0 = 3.0451e-04
Loss = 4.0003e-01, PNorm = 60.6481, GNorm = 3.2298, lr_0 = 3.0430e-04
Loss = 4.4998e-01, PNorm = 60.6496, GNorm = 1.2092, lr_0 = 3.0409e-04
Loss = 3.5752e-01, PNorm = 60.6582, GNorm = 2.0092, lr_0 = 3.0388e-04
Loss = 4.2350e-01, PNorm = 60.6596, GNorm = 1.2001, lr_0 = 3.0368e-04
Loss = 4.3444e-01, PNorm = 60.6657, GNorm = 1.2528, lr_0 = 3.0347e-04
Loss = 3.7581e-01, PNorm = 60.6688, GNorm = 1.7294, lr_0 = 3.0326e-04
Loss = 3.4438e-01, PNorm = 60.6717, GNorm = 1.1076, lr_0 = 3.0305e-04
Loss = 3.4259e-01, PNorm = 60.6787, GNorm = 1.2111, lr_0 = 3.0284e-04
Loss = 4.1034e-01, PNorm = 60.6791, GNorm = 1.2501, lr_0 = 3.0264e-04
Loss = 4.2118e-01, PNorm = 60.6851, GNorm = 1.9154, lr_0 = 3.0243e-04
Loss = 3.6426e-01, PNorm = 60.6943, GNorm = 1.2588, lr_0 = 3.0222e-04
Loss = 4.2677e-01, PNorm = 60.6972, GNorm = 1.3697, lr_0 = 3.0202e-04
Loss = 3.4820e-01, PNorm = 60.7017, GNorm = 1.6559, lr_0 = 3.0181e-04
Loss = 4.0123e-01, PNorm = 60.7083, GNorm = 1.1989, lr_0 = 3.0160e-04
Loss = 3.9579e-01, PNorm = 60.7113, GNorm = 1.6611, lr_0 = 3.0140e-04
Loss = 3.8447e-01, PNorm = 60.7133, GNorm = 1.3021, lr_0 = 3.0119e-04
Loss = 4.0787e-01, PNorm = 60.7159, GNorm = 1.6760, lr_0 = 3.0098e-04
Loss = 3.7746e-01, PNorm = 60.7184, GNorm = 1.1169, lr_0 = 3.0078e-04
Loss = 4.1463e-01, PNorm = 60.7219, GNorm = 1.3775, lr_0 = 3.0057e-04
Loss = 3.4617e-01, PNorm = 60.7250, GNorm = 1.4953, lr_0 = 3.0036e-04
Loss = 3.5475e-01, PNorm = 60.7286, GNorm = 2.0727, lr_0 = 3.0016e-04
Loss = 3.8698e-01, PNorm = 60.7310, GNorm = 1.4607, lr_0 = 2.9995e-04
Loss = 3.5319e-01, PNorm = 60.7380, GNorm = 1.3771, lr_0 = 2.9975e-04
Loss = 4.0677e-01, PNorm = 60.7398, GNorm = 1.3759, lr_0 = 2.9954e-04
Loss = 3.7886e-01, PNorm = 60.7403, GNorm = 1.4708, lr_0 = 2.9934e-04
Loss = 3.7983e-01, PNorm = 60.7441, GNorm = 1.4761, lr_0 = 2.9913e-04
Loss = 3.6322e-01, PNorm = 60.7434, GNorm = 1.5076, lr_0 = 2.9893e-04
Loss = 3.3362e-01, PNorm = 60.7540, GNorm = 1.0768, lr_0 = 2.9872e-04
Loss = 4.4451e-01, PNorm = 60.7582, GNorm = 1.8233, lr_0 = 2.9852e-04
Loss = 3.5125e-01, PNorm = 60.7624, GNorm = 1.5269, lr_0 = 2.9831e-04
Loss = 3.6969e-01, PNorm = 60.7621, GNorm = 1.4664, lr_0 = 2.9811e-04
Loss = 3.5735e-01, PNorm = 60.7648, GNorm = 1.3672, lr_0 = 2.9790e-04
Loss = 4.0494e-01, PNorm = 60.7652, GNorm = 1.1250, lr_0 = 2.9770e-04
Loss = 3.3188e-01, PNorm = 60.7671, GNorm = 1.0929, lr_0 = 2.9750e-04
Loss = 4.0577e-01, PNorm = 60.7723, GNorm = 1.1098, lr_0 = 2.9729e-04
Loss = 4.0509e-01, PNorm = 60.7750, GNorm = 1.2092, lr_0 = 2.9709e-04
Loss = 3.7144e-01, PNorm = 60.7834, GNorm = 1.5616, lr_0 = 2.9689e-04
Loss = 3.1931e-01, PNorm = 60.7927, GNorm = 1.1912, lr_0 = 2.9668e-04
Loss = 3.5496e-01, PNorm = 60.7946, GNorm = 1.6602, lr_0 = 2.9648e-04
Loss = 4.1438e-01, PNorm = 60.7996, GNorm = 0.9353, lr_0 = 2.9628e-04
Loss = 3.7299e-01, PNorm = 60.8033, GNorm = 1.4907, lr_0 = 2.9607e-04
Loss = 3.9986e-01, PNorm = 60.8036, GNorm = 1.8241, lr_0 = 2.9587e-04
Loss = 3.4477e-01, PNorm = 60.8047, GNorm = 1.5026, lr_0 = 2.9567e-04
Loss = 3.1660e-01, PNorm = 60.8068, GNorm = 1.0354, lr_0 = 2.9546e-04
Loss = 4.3368e-01, PNorm = 60.8114, GNorm = 1.3310, lr_0 = 2.9526e-04
Loss = 4.0735e-01, PNorm = 60.8147, GNorm = 1.7784, lr_0 = 2.9506e-04
Loss = 3.4186e-01, PNorm = 60.8206, GNorm = 1.2673, lr_0 = 2.9486e-04
Loss = 3.8673e-01, PNorm = 60.8238, GNorm = 1.3062, lr_0 = 2.9466e-04
Loss = 3.5962e-01, PNorm = 60.8271, GNorm = 1.3852, lr_0 = 2.9445e-04
Loss = 3.7421e-01, PNorm = 60.8330, GNorm = 1.2357, lr_0 = 2.9425e-04
Loss = 3.8526e-01, PNorm = 60.8391, GNorm = 1.3231, lr_0 = 2.9405e-04
Loss = 3.9191e-01, PNorm = 60.8416, GNorm = 1.2774, lr_0 = 2.9385e-04
Loss = 3.3148e-01, PNorm = 60.8422, GNorm = 0.9685, lr_0 = 2.9365e-04
Loss = 3.7321e-01, PNorm = 60.8423, GNorm = 1.4299, lr_0 = 2.9345e-04
Loss = 3.5153e-01, PNorm = 60.8418, GNorm = 1.9946, lr_0 = 2.9325e-04
Loss = 3.9166e-01, PNorm = 60.8467, GNorm = 1.5169, lr_0 = 2.9305e-04
Loss = 4.1114e-01, PNorm = 60.8479, GNorm = 1.0519, lr_0 = 2.9284e-04
Loss = 3.5665e-01, PNorm = 60.8544, GNorm = 1.5404, lr_0 = 2.9264e-04
Loss = 4.0099e-01, PNorm = 60.8581, GNorm = 1.5112, lr_0 = 2.9244e-04
Loss = 3.6515e-01, PNorm = 60.8610, GNorm = 1.5363, lr_0 = 2.9224e-04
Loss = 3.1330e-01, PNorm = 60.8611, GNorm = 1.8295, lr_0 = 2.9204e-04
Loss = 4.3439e-01, PNorm = 60.8636, GNorm = 1.3669, lr_0 = 2.9184e-04
Loss = 3.4710e-01, PNorm = 60.8633, GNorm = 1.3888, lr_0 = 2.9164e-04
Loss = 3.9103e-01, PNorm = 60.8657, GNorm = 1.2793, lr_0 = 2.9144e-04
Loss = 3.9382e-01, PNorm = 60.8666, GNorm = 1.0752, lr_0 = 2.9124e-04
Validation mae = 0.112662
Epoch 17
Loss = 3.8922e-01, PNorm = 60.8700, GNorm = 1.3714, lr_0 = 2.9104e-04
Loss = 3.2906e-01, PNorm = 60.8721, GNorm = 1.3526, lr_0 = 2.9084e-04
Loss = 3.7388e-01, PNorm = 60.8774, GNorm = 2.5885, lr_0 = 2.9065e-04
Loss = 3.8435e-01, PNorm = 60.8802, GNorm = 2.3635, lr_0 = 2.9045e-04
Loss = 3.9159e-01, PNorm = 60.8921, GNorm = 1.2042, lr_0 = 2.9025e-04
Loss = 3.7510e-01, PNorm = 60.8945, GNorm = 1.4416, lr_0 = 2.9005e-04
Loss = 3.5910e-01, PNorm = 60.8982, GNorm = 1.3788, lr_0 = 2.8985e-04
Loss = 3.3476e-01, PNorm = 60.8984, GNorm = 1.2563, lr_0 = 2.8965e-04
Loss = 3.4832e-01, PNorm = 60.9004, GNorm = 1.6510, lr_0 = 2.8945e-04
Loss = 3.8971e-01, PNorm = 60.9083, GNorm = 1.9525, lr_0 = 2.8925e-04
Loss = 3.8501e-01, PNorm = 60.9096, GNorm = 1.4521, lr_0 = 2.8906e-04
Loss = 4.4541e-01, PNorm = 60.9166, GNorm = 1.6514, lr_0 = 2.8886e-04
Loss = 3.6616e-01, PNorm = 60.9212, GNorm = 1.6734, lr_0 = 2.8866e-04
Loss = 3.4878e-01, PNorm = 60.9233, GNorm = 1.5419, lr_0 = 2.8846e-04
Loss = 3.6602e-01, PNorm = 60.9271, GNorm = 1.3118, lr_0 = 2.8826e-04
Loss = 4.3846e-01, PNorm = 60.9316, GNorm = 1.2973, lr_0 = 2.8807e-04
Loss = 3.8212e-01, PNorm = 60.9313, GNorm = 1.4786, lr_0 = 2.8787e-04
Loss = 3.0834e-01, PNorm = 60.9375, GNorm = 1.1740, lr_0 = 2.8767e-04
Loss = 4.5322e-01, PNorm = 60.9360, GNorm = 1.6510, lr_0 = 2.8748e-04
Loss = 3.6127e-01, PNorm = 60.9389, GNorm = 2.1608, lr_0 = 2.8728e-04
Loss = 3.6279e-01, PNorm = 60.9419, GNorm = 1.1380, lr_0 = 2.8708e-04
Loss = 3.2274e-01, PNorm = 60.9440, GNorm = 1.1312, lr_0 = 2.8689e-04
Loss = 3.1730e-01, PNorm = 60.9473, GNorm = 1.2080, lr_0 = 2.8669e-04
Loss = 3.7883e-01, PNorm = 60.9535, GNorm = 1.2412, lr_0 = 2.8649e-04
Loss = 3.8495e-01, PNorm = 60.9597, GNorm = 1.3639, lr_0 = 2.8630e-04
Loss = 3.4655e-01, PNorm = 60.9607, GNorm = 1.5133, lr_0 = 2.8610e-04
Loss = 3.7795e-01, PNorm = 60.9659, GNorm = 1.8292, lr_0 = 2.8590e-04
Loss = 3.7014e-01, PNorm = 60.9675, GNorm = 1.4005, lr_0 = 2.8571e-04
Loss = 4.3978e-01, PNorm = 60.9700, GNorm = 1.4197, lr_0 = 2.8551e-04
Loss = 3.4036e-01, PNorm = 60.9760, GNorm = 1.1397, lr_0 = 2.8532e-04
Loss = 3.7190e-01, PNorm = 60.9791, GNorm = 1.7524, lr_0 = 2.8512e-04
Loss = 3.6875e-01, PNorm = 60.9797, GNorm = 1.2240, lr_0 = 2.8493e-04
Loss = 4.0300e-01, PNorm = 60.9821, GNorm = 1.5331, lr_0 = 2.8473e-04
Loss = 4.1040e-01, PNorm = 60.9827, GNorm = 2.0157, lr_0 = 2.8454e-04
Loss = 3.5563e-01, PNorm = 60.9860, GNorm = 1.4556, lr_0 = 2.8434e-04
Loss = 4.3214e-01, PNorm = 60.9923, GNorm = 2.3534, lr_0 = 2.8415e-04
Loss = 3.7934e-01, PNorm = 60.9910, GNorm = 1.2963, lr_0 = 2.8395e-04
Loss = 4.1058e-01, PNorm = 60.9921, GNorm = 1.7867, lr_0 = 2.8376e-04
Loss = 3.7236e-01, PNorm = 60.9946, GNorm = 1.4941, lr_0 = 2.8356e-04
Loss = 3.9490e-01, PNorm = 60.9963, GNorm = 1.1994, lr_0 = 2.8337e-04
Loss = 4.1019e-01, PNorm = 61.0037, GNorm = 1.5991, lr_0 = 2.8317e-04
Loss = 3.7212e-01, PNorm = 61.0061, GNorm = 1.6596, lr_0 = 2.8298e-04
Loss = 3.4127e-01, PNorm = 61.0109, GNorm = 1.4013, lr_0 = 2.8279e-04
Loss = 4.0358e-01, PNorm = 61.0152, GNorm = 1.2276, lr_0 = 2.8259e-04
Loss = 3.7741e-01, PNorm = 61.0193, GNorm = 1.7545, lr_0 = 2.8240e-04
Loss = 3.5356e-01, PNorm = 61.0195, GNorm = 1.1600, lr_0 = 2.8221e-04
Loss = 3.6927e-01, PNorm = 61.0234, GNorm = 1.5925, lr_0 = 2.8201e-04
Loss = 4.2369e-01, PNorm = 61.0293, GNorm = 2.9984, lr_0 = 2.8182e-04
Loss = 4.0494e-01, PNorm = 61.0333, GNorm = 1.8662, lr_0 = 2.8163e-04
Loss = 3.4139e-01, PNorm = 61.0354, GNorm = 1.3208, lr_0 = 2.8143e-04
Loss = 3.3073e-01, PNorm = 61.0379, GNorm = 1.2752, lr_0 = 2.8124e-04
Loss = 3.9702e-01, PNorm = 61.0411, GNorm = 1.2982, lr_0 = 2.8105e-04
Loss = 4.1727e-01, PNorm = 61.0457, GNorm = 1.5207, lr_0 = 2.8085e-04
Loss = 4.1842e-01, PNorm = 61.0481, GNorm = 1.8974, lr_0 = 2.8066e-04
Loss = 3.8387e-01, PNorm = 61.0499, GNorm = 3.0749, lr_0 = 2.8047e-04
Loss = 4.3301e-01, PNorm = 61.0563, GNorm = 1.1382, lr_0 = 2.8028e-04
Loss = 3.6724e-01, PNorm = 61.0657, GNorm = 1.4403, lr_0 = 2.8009e-04
Loss = 4.6379e-01, PNorm = 61.0698, GNorm = 1.1536, lr_0 = 2.7989e-04
Loss = 3.8391e-01, PNorm = 61.0748, GNorm = 1.4999, lr_0 = 2.7970e-04
Loss = 3.9046e-01, PNorm = 61.0775, GNorm = 1.5089, lr_0 = 2.7951e-04
Loss = 3.8605e-01, PNorm = 61.0803, GNorm = 1.2072, lr_0 = 2.7932e-04
Loss = 3.5332e-01, PNorm = 61.0826, GNorm = 1.9443, lr_0 = 2.7913e-04
Loss = 3.7378e-01, PNorm = 61.0818, GNorm = 1.6724, lr_0 = 2.7894e-04
Loss = 3.5825e-01, PNorm = 61.0826, GNorm = 1.5451, lr_0 = 2.7875e-04
Loss = 3.7519e-01, PNorm = 61.0835, GNorm = 1.7395, lr_0 = 2.7855e-04
Loss = 4.3053e-01, PNorm = 61.0861, GNorm = 1.2882, lr_0 = 2.7836e-04
Loss = 4.1598e-01, PNorm = 61.0913, GNorm = 1.7876, lr_0 = 2.7817e-04
Loss = 3.9809e-01, PNorm = 61.0950, GNorm = 1.4043, lr_0 = 2.7798e-04
Loss = 4.0549e-01, PNorm = 61.0995, GNorm = 2.4338, lr_0 = 2.7779e-04
Loss = 3.6441e-01, PNorm = 61.1041, GNorm = 0.9819, lr_0 = 2.7760e-04
Loss = 4.3181e-01, PNorm = 61.1085, GNorm = 1.6749, lr_0 = 2.7741e-04
Loss = 3.5370e-01, PNorm = 61.1153, GNorm = 1.8124, lr_0 = 2.7722e-04
Loss = 3.4219e-01, PNorm = 61.1201, GNorm = 1.1617, lr_0 = 2.7703e-04
Loss = 3.7738e-01, PNorm = 61.1238, GNorm = 1.7595, lr_0 = 2.7684e-04
Loss = 3.4138e-01, PNorm = 61.1252, GNorm = 1.5551, lr_0 = 2.7665e-04
Loss = 4.0910e-01, PNorm = 61.1298, GNorm = 1.7510, lr_0 = 2.7646e-04
Loss = 3.7011e-01, PNorm = 61.1340, GNorm = 1.5837, lr_0 = 2.7627e-04
Loss = 3.1791e-01, PNorm = 61.1408, GNorm = 1.2935, lr_0 = 2.7608e-04
Loss = 4.0096e-01, PNorm = 61.1430, GNorm = 1.5227, lr_0 = 2.7590e-04
Loss = 4.0346e-01, PNorm = 61.1451, GNorm = 2.3613, lr_0 = 2.7571e-04
Loss = 3.2687e-01, PNorm = 61.1462, GNorm = 1.8159, lr_0 = 2.7552e-04
Loss = 4.1855e-01, PNorm = 61.1523, GNorm = 1.9802, lr_0 = 2.7533e-04
Loss = 3.9138e-01, PNorm = 61.1513, GNorm = 1.2239, lr_0 = 2.7514e-04
Loss = 3.5572e-01, PNorm = 61.1575, GNorm = 1.9650, lr_0 = 2.7495e-04
Loss = 3.7205e-01, PNorm = 61.1588, GNorm = 1.4590, lr_0 = 2.7476e-04
Loss = 3.9555e-01, PNorm = 61.1620, GNorm = 1.6360, lr_0 = 2.7457e-04
Loss = 3.0810e-01, PNorm = 61.1646, GNorm = 1.2589, lr_0 = 2.7439e-04
Loss = 3.6462e-01, PNorm = 61.1642, GNorm = 1.6351, lr_0 = 2.7420e-04
Loss = 3.8107e-01, PNorm = 61.1681, GNorm = 1.3615, lr_0 = 2.7401e-04
Loss = 3.5821e-01, PNorm = 61.1704, GNorm = 1.1140, lr_0 = 2.7382e-04
Loss = 3.6798e-01, PNorm = 61.1741, GNorm = 0.9231, lr_0 = 2.7364e-04
Loss = 3.8752e-01, PNorm = 61.1792, GNorm = 1.1916, lr_0 = 2.7345e-04
Loss = 3.4843e-01, PNorm = 61.1833, GNorm = 1.2075, lr_0 = 2.7326e-04
Loss = 3.7805e-01, PNorm = 61.1855, GNorm = 1.1217, lr_0 = 2.7307e-04
Loss = 3.2124e-01, PNorm = 61.1877, GNorm = 1.4302, lr_0 = 2.7289e-04
Loss = 4.3308e-01, PNorm = 61.1873, GNorm = 1.3092, lr_0 = 2.7270e-04
Loss = 3.8641e-01, PNorm = 61.1890, GNorm = 1.5273, lr_0 = 2.7251e-04
Loss = 3.5583e-01, PNorm = 61.1917, GNorm = 1.3727, lr_0 = 2.7233e-04
Loss = 4.2418e-01, PNorm = 61.1955, GNorm = 1.6569, lr_0 = 2.7214e-04
Loss = 3.3442e-01, PNorm = 61.2014, GNorm = 0.9366, lr_0 = 2.7195e-04
Loss = 3.6934e-01, PNorm = 61.2036, GNorm = 1.2865, lr_0 = 2.7177e-04
Loss = 3.2662e-01, PNorm = 61.2065, GNorm = 1.0767, lr_0 = 2.7158e-04
Loss = 3.4166e-01, PNorm = 61.2105, GNorm = 1.6908, lr_0 = 2.7139e-04
Loss = 3.6340e-01, PNorm = 61.2108, GNorm = 1.3593, lr_0 = 2.7121e-04
Loss = 3.4411e-01, PNorm = 61.2129, GNorm = 1.4021, lr_0 = 2.7102e-04
Loss = 3.4638e-01, PNorm = 61.2178, GNorm = 1.2423, lr_0 = 2.7084e-04
Loss = 3.6610e-01, PNorm = 61.2187, GNorm = 1.2235, lr_0 = 2.7065e-04
Loss = 3.1162e-01, PNorm = 61.2228, GNorm = 1.3832, lr_0 = 2.7047e-04
Loss = 3.6172e-01, PNorm = 61.2272, GNorm = 1.6053, lr_0 = 2.7028e-04
Loss = 3.6782e-01, PNorm = 61.2314, GNorm = 1.2479, lr_0 = 2.7010e-04
Loss = 4.1029e-01, PNorm = 61.2338, GNorm = 1.2945, lr_0 = 2.6991e-04
Loss = 3.7251e-01, PNorm = 61.2392, GNorm = 1.2898, lr_0 = 2.6973e-04
Loss = 4.0905e-01, PNorm = 61.2428, GNorm = 1.1367, lr_0 = 2.6954e-04
Loss = 3.5055e-01, PNorm = 61.2446, GNorm = 1.4220, lr_0 = 2.6936e-04
Loss = 3.8513e-01, PNorm = 61.2485, GNorm = 0.8675, lr_0 = 2.6917e-04
Loss = 3.3894e-01, PNorm = 61.2507, GNorm = 1.1186, lr_0 = 2.6899e-04
Loss = 4.0069e-01, PNorm = 61.2530, GNorm = 1.1892, lr_0 = 2.6880e-04
Loss = 3.7738e-01, PNorm = 61.2569, GNorm = 1.8728, lr_0 = 2.6862e-04
Loss = 3.9646e-01, PNorm = 61.2592, GNorm = 1.6975, lr_0 = 2.6844e-04
Loss = 3.4472e-01, PNorm = 61.2638, GNorm = 1.3502, lr_0 = 2.6825e-04
Validation mae = 0.111978
Epoch 18
Loss = 3.9146e-01, PNorm = 61.2666, GNorm = 1.2342, lr_0 = 2.6807e-04
Loss = 3.9957e-01, PNorm = 61.2725, GNorm = 1.7054, lr_0 = 2.6788e-04
Loss = 3.2587e-01, PNorm = 61.2763, GNorm = 1.6285, lr_0 = 2.6770e-04
Loss = 3.8021e-01, PNorm = 61.2791, GNorm = 1.1461, lr_0 = 2.6752e-04
Loss = 3.5443e-01, PNorm = 61.2850, GNorm = 1.5462, lr_0 = 2.6733e-04
Loss = 3.4207e-01, PNorm = 61.2856, GNorm = 1.6694, lr_0 = 2.6715e-04
Loss = 3.9206e-01, PNorm = 61.2882, GNorm = 1.3273, lr_0 = 2.6697e-04
Loss = 4.0907e-01, PNorm = 61.2902, GNorm = 1.9921, lr_0 = 2.6678e-04
Loss = 3.5472e-01, PNorm = 61.2941, GNorm = 1.2007, lr_0 = 2.6660e-04
Loss = 3.9866e-01, PNorm = 61.3000, GNorm = 1.4759, lr_0 = 2.6642e-04
Loss = 3.6035e-01, PNorm = 61.3043, GNorm = 1.4202, lr_0 = 2.6624e-04
Loss = 3.8534e-01, PNorm = 61.3062, GNorm = 1.4336, lr_0 = 2.6605e-04
Loss = 3.3536e-01, PNorm = 61.3082, GNorm = 1.6038, lr_0 = 2.6587e-04
Loss = 3.7051e-01, PNorm = 61.3105, GNorm = 1.1963, lr_0 = 2.6569e-04
Loss = 4.5203e-01, PNorm = 61.3121, GNorm = 1.8112, lr_0 = 2.6551e-04
Loss = 3.5337e-01, PNorm = 61.3144, GNorm = 1.0642, lr_0 = 2.6533e-04
Loss = 3.2694e-01, PNorm = 61.3202, GNorm = 1.0066, lr_0 = 2.6514e-04
Loss = 3.4647e-01, PNorm = 61.3239, GNorm = 1.5402, lr_0 = 2.6496e-04
Loss = 3.7703e-01, PNorm = 61.3270, GNorm = 1.2499, lr_0 = 2.6478e-04
Loss = 3.6764e-01, PNorm = 61.3325, GNorm = 1.2726, lr_0 = 2.6460e-04
Loss = 3.6119e-01, PNorm = 61.3334, GNorm = 2.0980, lr_0 = 2.6442e-04
Loss = 3.8056e-01, PNorm = 61.3366, GNorm = 2.0180, lr_0 = 2.6424e-04
Loss = 3.3962e-01, PNorm = 61.3401, GNorm = 1.3283, lr_0 = 2.6406e-04
Loss = 4.2105e-01, PNorm = 61.3419, GNorm = 2.0141, lr_0 = 2.6388e-04
Loss = 2.9553e-01, PNorm = 61.3472, GNorm = 1.5013, lr_0 = 2.6369e-04
Loss = 2.9699e-01, PNorm = 61.3543, GNorm = 1.7056, lr_0 = 2.6351e-04
Loss = 3.2747e-01, PNorm = 61.3583, GNorm = 1.6519, lr_0 = 2.6333e-04
Loss = 3.9030e-01, PNorm = 61.3597, GNorm = 1.4534, lr_0 = 2.6315e-04
Loss = 4.1429e-01, PNorm = 61.3635, GNorm = 1.6033, lr_0 = 2.6297e-04
Loss = 3.7119e-01, PNorm = 61.3679, GNorm = 1.2787, lr_0 = 2.6279e-04
Loss = 3.5332e-01, PNorm = 61.3692, GNorm = 1.3383, lr_0 = 2.6261e-04
Loss = 3.5165e-01, PNorm = 61.3699, GNorm = 1.0746, lr_0 = 2.6243e-04
Loss = 3.9898e-01, PNorm = 61.3712, GNorm = 0.9770, lr_0 = 2.6225e-04
Loss = 3.4847e-01, PNorm = 61.3776, GNorm = 1.1447, lr_0 = 2.6207e-04
Loss = 3.7992e-01, PNorm = 61.3860, GNorm = 1.3431, lr_0 = 2.6189e-04
Loss = 3.7199e-01, PNorm = 61.3857, GNorm = 1.8557, lr_0 = 2.6171e-04
Loss = 3.8294e-01, PNorm = 61.3858, GNorm = 1.1898, lr_0 = 2.6153e-04
Loss = 3.6732e-01, PNorm = 61.3884, GNorm = 1.9220, lr_0 = 2.6136e-04
Loss = 3.9273e-01, PNorm = 61.3917, GNorm = 1.2338, lr_0 = 2.6118e-04
Loss = 3.8029e-01, PNorm = 61.3957, GNorm = 2.1667, lr_0 = 2.6100e-04
Loss = 3.3476e-01, PNorm = 61.4009, GNorm = 1.3724, lr_0 = 2.6082e-04
Loss = 3.7770e-01, PNorm = 61.4050, GNorm = 1.3087, lr_0 = 2.6064e-04
Loss = 4.2324e-01, PNorm = 61.4091, GNorm = 1.1633, lr_0 = 2.6046e-04
Loss = 3.7516e-01, PNorm = 61.4116, GNorm = 1.4025, lr_0 = 2.6028e-04
Loss = 3.5538e-01, PNorm = 61.4136, GNorm = 1.1606, lr_0 = 2.6011e-04
Loss = 3.7177e-01, PNorm = 61.4182, GNorm = 1.7632, lr_0 = 2.5993e-04
Loss = 3.1954e-01, PNorm = 61.4202, GNorm = 0.9827, lr_0 = 2.5975e-04
Loss = 3.3672e-01, PNorm = 61.4180, GNorm = 1.1261, lr_0 = 2.5957e-04
Loss = 3.1129e-01, PNorm = 61.4207, GNorm = 0.9762, lr_0 = 2.5939e-04
Loss = 3.6400e-01, PNorm = 61.4247, GNorm = 1.6183, lr_0 = 2.5922e-04
Loss = 3.6374e-01, PNorm = 61.4294, GNorm = 1.6914, lr_0 = 2.5904e-04
Loss = 3.2923e-01, PNorm = 61.4297, GNorm = 1.0418, lr_0 = 2.5886e-04
Loss = 3.8832e-01, PNorm = 61.4317, GNorm = 1.9806, lr_0 = 2.5868e-04
Loss = 3.8287e-01, PNorm = 61.4342, GNorm = 1.1766, lr_0 = 2.5851e-04
Loss = 4.0669e-01, PNorm = 61.4379, GNorm = 1.4136, lr_0 = 2.5833e-04
Loss = 3.6216e-01, PNorm = 61.4448, GNorm = 1.7821, lr_0 = 2.5815e-04
Loss = 3.8039e-01, PNorm = 61.4473, GNorm = 1.3610, lr_0 = 2.5797e-04
Loss = 4.1695e-01, PNorm = 61.4455, GNorm = 1.1968, lr_0 = 2.5780e-04
Loss = 3.3925e-01, PNorm = 61.4501, GNorm = 1.2950, lr_0 = 2.5762e-04
Loss = 3.4979e-01, PNorm = 61.4549, GNorm = 1.1420, lr_0 = 2.5745e-04
Loss = 3.8218e-01, PNorm = 61.4559, GNorm = 1.7900, lr_0 = 2.5727e-04
Loss = 3.8300e-01, PNorm = 61.4603, GNorm = 1.0742, lr_0 = 2.5709e-04
Loss = 4.0929e-01, PNorm = 61.4641, GNorm = 1.9759, lr_0 = 2.5692e-04
Loss = 3.8789e-01, PNorm = 61.4685, GNorm = 1.3201, lr_0 = 2.5674e-04
Loss = 3.1594e-01, PNorm = 61.4725, GNorm = 1.0111, lr_0 = 2.5656e-04
Loss = 3.5547e-01, PNorm = 61.4771, GNorm = 1.7127, lr_0 = 2.5639e-04
Loss = 3.8816e-01, PNorm = 61.4808, GNorm = 1.2138, lr_0 = 2.5621e-04
Loss = 3.4228e-01, PNorm = 61.4852, GNorm = 1.1022, lr_0 = 2.5604e-04
Loss = 3.8451e-01, PNorm = 61.4888, GNorm = 1.6550, lr_0 = 2.5586e-04
Loss = 3.2801e-01, PNorm = 61.4889, GNorm = 1.1394, lr_0 = 2.5569e-04
Loss = 3.7929e-01, PNorm = 61.4928, GNorm = 1.3304, lr_0 = 2.5551e-04
Loss = 3.9609e-01, PNorm = 61.4962, GNorm = 2.5501, lr_0 = 2.5534e-04
Loss = 3.9506e-01, PNorm = 61.5021, GNorm = 1.3222, lr_0 = 2.5516e-04
Loss = 3.5716e-01, PNorm = 61.5073, GNorm = 1.7170, lr_0 = 2.5499e-04
Loss = 3.5878e-01, PNorm = 61.5091, GNorm = 1.6488, lr_0 = 2.5481e-04
Loss = 3.9810e-01, PNorm = 61.5092, GNorm = 2.0310, lr_0 = 2.5464e-04
Loss = 3.4504e-01, PNorm = 61.5116, GNorm = 1.6909, lr_0 = 2.5446e-04
Loss = 3.6015e-01, PNorm = 61.5158, GNorm = 1.4004, lr_0 = 2.5429e-04
Loss = 3.3525e-01, PNorm = 61.5208, GNorm = 2.5650, lr_0 = 2.5411e-04
Loss = 3.6823e-01, PNorm = 61.5245, GNorm = 1.2372, lr_0 = 2.5394e-04
Loss = 4.0400e-01, PNorm = 61.5289, GNorm = 1.1912, lr_0 = 2.5377e-04
Loss = 3.4743e-01, PNorm = 61.5308, GNorm = 1.1679, lr_0 = 2.5359e-04
Loss = 3.9308e-01, PNorm = 61.5320, GNorm = 2.8371, lr_0 = 2.5342e-04
Loss = 3.3941e-01, PNorm = 61.5389, GNorm = 1.2707, lr_0 = 2.5325e-04
Loss = 3.7138e-01, PNorm = 61.5402, GNorm = 1.9230, lr_0 = 2.5307e-04
Loss = 3.6115e-01, PNorm = 61.5457, GNorm = 1.4885, lr_0 = 2.5290e-04
Loss = 3.4544e-01, PNorm = 61.5492, GNorm = 1.0966, lr_0 = 2.5273e-04
Loss = 4.0993e-01, PNorm = 61.5551, GNorm = 2.0272, lr_0 = 2.5255e-04
Loss = 3.4883e-01, PNorm = 61.5579, GNorm = 2.1333, lr_0 = 2.5238e-04
Loss = 3.9552e-01, PNorm = 61.5612, GNorm = 1.5109, lr_0 = 2.5221e-04
Loss = 3.8697e-01, PNorm = 61.5675, GNorm = 1.3233, lr_0 = 2.5203e-04
Loss = 3.3869e-01, PNorm = 61.5709, GNorm = 1.2225, lr_0 = 2.5186e-04
Loss = 3.1008e-01, PNorm = 61.5704, GNorm = 1.4065, lr_0 = 2.5169e-04
Loss = 3.8459e-01, PNorm = 61.5681, GNorm = 2.0668, lr_0 = 2.5152e-04
Loss = 3.8733e-01, PNorm = 61.5685, GNorm = 1.1580, lr_0 = 2.5134e-04
Loss = 3.6100e-01, PNorm = 61.5691, GNorm = 1.6843, lr_0 = 2.5117e-04
Loss = 4.6463e-01, PNorm = 61.5727, GNorm = 1.9422, lr_0 = 2.5100e-04
Loss = 3.6540e-01, PNorm = 61.5797, GNorm = 2.0260, lr_0 = 2.5083e-04
Loss = 3.7869e-01, PNorm = 61.5804, GNorm = 1.3287, lr_0 = 2.5066e-04
Loss = 4.1460e-01, PNorm = 61.5821, GNorm = 2.5255, lr_0 = 2.5048e-04
Loss = 3.3198e-01, PNorm = 61.5865, GNorm = 1.6763, lr_0 = 2.5031e-04
Loss = 3.3268e-01, PNorm = 61.5869, GNorm = 1.4391, lr_0 = 2.5014e-04
Loss = 3.6500e-01, PNorm = 61.5916, GNorm = 1.8581, lr_0 = 2.4997e-04
Loss = 3.5474e-01, PNorm = 61.5960, GNorm = 1.3415, lr_0 = 2.4980e-04
Loss = 3.4485e-01, PNorm = 61.5994, GNorm = 1.5745, lr_0 = 2.4963e-04
Loss = 4.1297e-01, PNorm = 61.6013, GNorm = 1.4474, lr_0 = 2.4946e-04
Loss = 3.2004e-01, PNorm = 61.6072, GNorm = 1.0971, lr_0 = 2.4929e-04
Loss = 3.9230e-01, PNorm = 61.6083, GNorm = 1.4964, lr_0 = 2.4911e-04
Loss = 3.9290e-01, PNorm = 61.6084, GNorm = 1.2937, lr_0 = 2.4894e-04
Loss = 4.1638e-01, PNorm = 61.6116, GNorm = 2.0737, lr_0 = 2.4877e-04
Loss = 3.8953e-01, PNorm = 61.6135, GNorm = 1.1952, lr_0 = 2.4860e-04
Loss = 3.8817e-01, PNorm = 61.6145, GNorm = 1.3335, lr_0 = 2.4843e-04
Loss = 3.8093e-01, PNorm = 61.6170, GNorm = 2.0180, lr_0 = 2.4826e-04
Loss = 3.4827e-01, PNorm = 61.6197, GNorm = 1.4422, lr_0 = 2.4809e-04
Loss = 3.4573e-01, PNorm = 61.6229, GNorm = 1.6012, lr_0 = 2.4792e-04
Loss = 3.9938e-01, PNorm = 61.6266, GNorm = 1.5943, lr_0 = 2.4775e-04
Loss = 3.6851e-01, PNorm = 61.6285, GNorm = 1.2088, lr_0 = 2.4758e-04
Loss = 3.4945e-01, PNorm = 61.6288, GNorm = 0.9971, lr_0 = 2.4741e-04
Loss = 3.6171e-01, PNorm = 61.6280, GNorm = 1.2071, lr_0 = 2.4724e-04
Loss = 3.4434e-01, PNorm = 61.6261, GNorm = 1.7081, lr_0 = 2.4707e-04
Validation mae = 0.112721
Epoch 19
Loss = 3.3389e-01, PNorm = 61.6309, GNorm = 1.3083, lr_0 = 2.4690e-04
Loss = 3.3601e-01, PNorm = 61.6319, GNorm = 1.4303, lr_0 = 2.4674e-04
Loss = 3.5718e-01, PNorm = 61.6345, GNorm = 1.3095, lr_0 = 2.4657e-04
Loss = 3.5577e-01, PNorm = 61.6378, GNorm = 1.4874, lr_0 = 2.4640e-04
Loss = 3.7175e-01, PNorm = 61.6414, GNorm = 1.2747, lr_0 = 2.4623e-04
Loss = 3.3660e-01, PNorm = 61.6454, GNorm = 0.8338, lr_0 = 2.4606e-04
Loss = 3.0998e-01, PNorm = 61.6474, GNorm = 1.7618, lr_0 = 2.4589e-04
Loss = 3.5062e-01, PNorm = 61.6491, GNorm = 2.0582, lr_0 = 2.4572e-04
Loss = 3.2222e-01, PNorm = 61.6518, GNorm = 0.9770, lr_0 = 2.4556e-04
Loss = 4.0682e-01, PNorm = 61.6535, GNorm = 0.9605, lr_0 = 2.4539e-04
Loss = 3.1452e-01, PNorm = 61.6565, GNorm = 1.4372, lr_0 = 2.4522e-04
Loss = 3.6936e-01, PNorm = 61.6592, GNorm = 1.4619, lr_0 = 2.4505e-04
Loss = 4.0823e-01, PNorm = 61.6622, GNorm = 1.2892, lr_0 = 2.4488e-04
Loss = 4.2435e-01, PNorm = 61.6637, GNorm = 1.9996, lr_0 = 2.4472e-04
Loss = 3.4263e-01, PNorm = 61.6667, GNorm = 1.1650, lr_0 = 2.4455e-04
Loss = 3.3472e-01, PNorm = 61.6682, GNorm = 1.8651, lr_0 = 2.4438e-04
Loss = 3.6694e-01, PNorm = 61.6720, GNorm = 1.5816, lr_0 = 2.4421e-04
Loss = 3.7569e-01, PNorm = 61.6775, GNorm = 1.3650, lr_0 = 2.4405e-04
Loss = 4.4301e-01, PNorm = 61.6840, GNorm = 1.5437, lr_0 = 2.4388e-04
Loss = 3.7763e-01, PNorm = 61.6871, GNorm = 1.1138, lr_0 = 2.4371e-04
Loss = 3.3672e-01, PNorm = 61.6858, GNorm = 1.5122, lr_0 = 2.4354e-04
Loss = 3.2077e-01, PNorm = 61.6892, GNorm = 1.5222, lr_0 = 2.4338e-04
Loss = 3.8797e-01, PNorm = 61.6917, GNorm = 2.1429, lr_0 = 2.4321e-04
Loss = 3.5156e-01, PNorm = 61.6953, GNorm = 1.0766, lr_0 = 2.4304e-04
Loss = 3.1905e-01, PNorm = 61.6993, GNorm = 1.5440, lr_0 = 2.4288e-04
Loss = 3.6754e-01, PNorm = 61.6984, GNorm = 1.3942, lr_0 = 2.4271e-04
Loss = 3.9486e-01, PNorm = 61.6990, GNorm = 1.7541, lr_0 = 2.4254e-04
Loss = 3.8873e-01, PNorm = 61.7048, GNorm = 1.4431, lr_0 = 2.4238e-04
Loss = 3.8629e-01, PNorm = 61.7063, GNorm = 1.4906, lr_0 = 2.4221e-04
Loss = 3.9564e-01, PNorm = 61.7105, GNorm = 1.4518, lr_0 = 2.4205e-04
Loss = 3.5428e-01, PNorm = 61.7150, GNorm = 1.9775, lr_0 = 2.4188e-04
Loss = 3.3378e-01, PNorm = 61.7178, GNorm = 1.5705, lr_0 = 2.4171e-04
Loss = 4.1590e-01, PNorm = 61.7259, GNorm = 1.6347, lr_0 = 2.4155e-04
Loss = 4.4012e-01, PNorm = 61.7270, GNorm = 2.2929, lr_0 = 2.4138e-04
Loss = 3.8182e-01, PNorm = 61.7281, GNorm = 1.7616, lr_0 = 2.4122e-04
Loss = 3.5392e-01, PNorm = 61.7341, GNorm = 1.5892, lr_0 = 2.4105e-04
Loss = 3.6046e-01, PNorm = 61.7352, GNorm = 1.2941, lr_0 = 2.4089e-04
Loss = 3.2285e-01, PNorm = 61.7374, GNorm = 1.7129, lr_0 = 2.4072e-04
Loss = 3.6481e-01, PNorm = 61.7394, GNorm = 1.4316, lr_0 = 2.4056e-04
Loss = 3.3471e-01, PNorm = 61.7415, GNorm = 1.3621, lr_0 = 2.4039e-04
Loss = 4.0048e-01, PNorm = 61.7462, GNorm = 1.7131, lr_0 = 2.4023e-04
Loss = 4.0226e-01, PNorm = 61.7476, GNorm = 1.7076, lr_0 = 2.4006e-04
Loss = 3.7157e-01, PNorm = 61.7500, GNorm = 0.8362, lr_0 = 2.3990e-04
Loss = 3.7848e-01, PNorm = 61.7499, GNorm = 1.2497, lr_0 = 2.3974e-04
Loss = 3.3512e-01, PNorm = 61.7510, GNorm = 1.4563, lr_0 = 2.3957e-04
Loss = 4.1171e-01, PNorm = 61.7519, GNorm = 2.0141, lr_0 = 2.3941e-04
Loss = 4.0439e-01, PNorm = 61.7574, GNorm = 2.0202, lr_0 = 2.3924e-04
Loss = 3.9525e-01, PNorm = 61.7586, GNorm = 1.4447, lr_0 = 2.3908e-04
Loss = 3.9042e-01, PNorm = 61.7655, GNorm = 1.1695, lr_0 = 2.3892e-04
Loss = 3.7495e-01, PNorm = 61.7683, GNorm = 1.3276, lr_0 = 2.3875e-04
Loss = 3.8831e-01, PNorm = 61.7689, GNorm = 1.4538, lr_0 = 2.3859e-04
Loss = 3.6956e-01, PNorm = 61.7737, GNorm = 1.3884, lr_0 = 2.3842e-04
Loss = 3.9299e-01, PNorm = 61.7782, GNorm = 1.3639, lr_0 = 2.3826e-04
Loss = 3.4482e-01, PNorm = 61.7798, GNorm = 1.2501, lr_0 = 2.3810e-04
Loss = 3.2142e-01, PNorm = 61.7808, GNorm = 1.4644, lr_0 = 2.3794e-04
Loss = 3.6164e-01, PNorm = 61.7860, GNorm = 1.2642, lr_0 = 2.3777e-04
Loss = 3.4347e-01, PNorm = 61.7908, GNorm = 1.6133, lr_0 = 2.3761e-04
Loss = 3.6662e-01, PNorm = 61.7923, GNorm = 2.0351, lr_0 = 2.3745e-04
Loss = 4.4407e-01, PNorm = 61.7956, GNorm = 1.6580, lr_0 = 2.3728e-04
Loss = 4.0016e-01, PNorm = 61.7976, GNorm = 1.6950, lr_0 = 2.3712e-04
Loss = 3.6062e-01, PNorm = 61.7982, GNorm = 1.8731, lr_0 = 2.3696e-04
Loss = 4.3236e-01, PNorm = 61.8000, GNorm = 1.7092, lr_0 = 2.3680e-04
Loss = 3.9508e-01, PNorm = 61.8032, GNorm = 1.8811, lr_0 = 2.3663e-04
Loss = 3.0357e-01, PNorm = 61.8045, GNorm = 1.3124, lr_0 = 2.3647e-04
Loss = 3.7105e-01, PNorm = 61.8048, GNorm = 1.7407, lr_0 = 2.3631e-04
Loss = 3.8452e-01, PNorm = 61.8070, GNorm = 1.8129, lr_0 = 2.3615e-04
Loss = 3.9349e-01, PNorm = 61.8106, GNorm = 1.4909, lr_0 = 2.3599e-04
Loss = 3.7479e-01, PNorm = 61.8142, GNorm = 1.3345, lr_0 = 2.3582e-04
Loss = 3.2905e-01, PNorm = 61.8178, GNorm = 1.1065, lr_0 = 2.3566e-04
Loss = 3.6266e-01, PNorm = 61.8200, GNorm = 1.2070, lr_0 = 2.3550e-04
Loss = 3.6599e-01, PNorm = 61.8220, GNorm = 2.1638, lr_0 = 2.3534e-04
Loss = 3.1058e-01, PNorm = 61.8230, GNorm = 1.3532, lr_0 = 2.3518e-04
Loss = 3.8670e-01, PNorm = 61.8238, GNorm = 1.4094, lr_0 = 2.3502e-04
Loss = 3.6734e-01, PNorm = 61.8269, GNorm = 1.1471, lr_0 = 2.3486e-04
Loss = 4.0217e-01, PNorm = 61.8311, GNorm = 1.4645, lr_0 = 2.3470e-04
Loss = 3.5751e-01, PNorm = 61.8335, GNorm = 1.5085, lr_0 = 2.3454e-04
Loss = 3.5064e-01, PNorm = 61.8359, GNorm = 1.3843, lr_0 = 2.3437e-04
Loss = 3.9409e-01, PNorm = 61.8397, GNorm = 1.5701, lr_0 = 2.3421e-04
Loss = 3.2768e-01, PNorm = 61.8445, GNorm = 1.4567, lr_0 = 2.3405e-04
Loss = 3.1013e-01, PNorm = 61.8494, GNorm = 1.5403, lr_0 = 2.3389e-04
Loss = 3.5932e-01, PNorm = 61.8522, GNorm = 0.9555, lr_0 = 2.3373e-04
Loss = 3.5201e-01, PNorm = 61.8579, GNorm = 1.2136, lr_0 = 2.3357e-04
Loss = 3.9432e-01, PNorm = 61.8585, GNorm = 1.5246, lr_0 = 2.3341e-04
Loss = 3.9063e-01, PNorm = 61.8613, GNorm = 1.9439, lr_0 = 2.3325e-04
Loss = 3.6768e-01, PNorm = 61.8654, GNorm = 1.5419, lr_0 = 2.3309e-04
Loss = 3.7518e-01, PNorm = 61.8682, GNorm = 1.0346, lr_0 = 2.3293e-04
Loss = 3.6682e-01, PNorm = 61.8713, GNorm = 1.1519, lr_0 = 2.3277e-04
Loss = 3.4258e-01, PNorm = 61.8733, GNorm = 1.5560, lr_0 = 2.3261e-04
Loss = 3.5692e-01, PNorm = 61.8718, GNorm = 2.1278, lr_0 = 2.3246e-04
Loss = 3.4667e-01, PNorm = 61.8736, GNorm = 1.5400, lr_0 = 2.3230e-04
Loss = 3.2546e-01, PNorm = 61.8752, GNorm = 1.2379, lr_0 = 2.3214e-04
Loss = 3.6022e-01, PNorm = 61.8749, GNorm = 1.5561, lr_0 = 2.3198e-04
Loss = 3.4773e-01, PNorm = 61.8800, GNorm = 1.4284, lr_0 = 2.3182e-04
Loss = 3.5985e-01, PNorm = 61.8835, GNorm = 1.4973, lr_0 = 2.3166e-04
Loss = 3.7814e-01, PNorm = 61.8832, GNorm = 1.1663, lr_0 = 2.3150e-04
Loss = 3.5779e-01, PNorm = 61.8853, GNorm = 1.2896, lr_0 = 2.3134e-04
Loss = 3.8876e-01, PNorm = 61.8866, GNorm = 1.4647, lr_0 = 2.3118e-04
Loss = 3.2402e-01, PNorm = 61.8905, GNorm = 1.1073, lr_0 = 2.3103e-04
Loss = 3.7973e-01, PNorm = 61.8922, GNorm = 1.3864, lr_0 = 2.3087e-04
Loss = 4.3905e-01, PNorm = 61.8948, GNorm = 1.6870, lr_0 = 2.3071e-04
Loss = 3.6916e-01, PNorm = 61.8990, GNorm = 1.2094, lr_0 = 2.3055e-04
Loss = 3.7325e-01, PNorm = 61.8987, GNorm = 1.5996, lr_0 = 2.3039e-04
Loss = 3.6485e-01, PNorm = 61.9033, GNorm = 1.5087, lr_0 = 2.3024e-04
Loss = 3.8243e-01, PNorm = 61.9020, GNorm = 1.7765, lr_0 = 2.3008e-04
Loss = 3.6602e-01, PNorm = 61.9055, GNorm = 1.6296, lr_0 = 2.2992e-04
Loss = 4.0741e-01, PNorm = 61.9047, GNorm = 1.7199, lr_0 = 2.2976e-04
Loss = 3.4890e-01, PNorm = 61.9093, GNorm = 1.5716, lr_0 = 2.2961e-04
Loss = 4.0546e-01, PNorm = 61.9135, GNorm = 1.7552, lr_0 = 2.2945e-04
Loss = 3.5062e-01, PNorm = 61.9157, GNorm = 2.4342, lr_0 = 2.2929e-04
Loss = 3.3240e-01, PNorm = 61.9193, GNorm = 1.1258, lr_0 = 2.2913e-04
Loss = 3.5703e-01, PNorm = 61.9222, GNorm = 1.9893, lr_0 = 2.2898e-04
Loss = 2.9821e-01, PNorm = 61.9231, GNorm = 1.0409, lr_0 = 2.2882e-04
Loss = 3.7314e-01, PNorm = 61.9248, GNorm = 1.8310, lr_0 = 2.2866e-04
Loss = 3.6668e-01, PNorm = 61.9266, GNorm = 1.6889, lr_0 = 2.2851e-04
Loss = 3.5267e-01, PNorm = 61.9285, GNorm = 1.5686, lr_0 = 2.2835e-04
Loss = 3.7334e-01, PNorm = 61.9309, GNorm = 1.3223, lr_0 = 2.2819e-04
Loss = 3.4827e-01, PNorm = 61.9322, GNorm = 2.4807, lr_0 = 2.2804e-04
Loss = 3.8629e-01, PNorm = 61.9368, GNorm = 1.5229, lr_0 = 2.2788e-04
Loss = 3.4005e-01, PNorm = 61.9377, GNorm = 1.1075, lr_0 = 2.2773e-04
Loss = 3.4988e-01, PNorm = 61.9408, GNorm = 1.9493, lr_0 = 2.2757e-04
Validation mae = 0.113295
Epoch 20
Loss = 3.3878e-01, PNorm = 61.9452, GNorm = 1.4017, lr_0 = 2.2741e-04
Loss = 4.1196e-01, PNorm = 61.9476, GNorm = 1.0451, lr_0 = 2.2726e-04
Loss = 3.4574e-01, PNorm = 61.9529, GNorm = 1.1901, lr_0 = 2.2710e-04
Loss = 3.6009e-01, PNorm = 61.9556, GNorm = 1.6973, lr_0 = 2.2695e-04
Loss = 3.1018e-01, PNorm = 61.9590, GNorm = 1.4063, lr_0 = 2.2679e-04
Loss = 2.7975e-01, PNorm = 61.9626, GNorm = 1.5602, lr_0 = 2.2664e-04
Loss = 3.6318e-01, PNorm = 61.9665, GNorm = 1.4020, lr_0 = 2.2648e-04
Loss = 3.3630e-01, PNorm = 61.9692, GNorm = 1.1012, lr_0 = 2.2632e-04
Loss = 2.9000e-01, PNorm = 61.9693, GNorm = 1.2401, lr_0 = 2.2617e-04
Loss = 3.6332e-01, PNorm = 61.9690, GNorm = 1.1979, lr_0 = 2.2601e-04
Loss = 3.4332e-01, PNorm = 61.9733, GNorm = 1.7085, lr_0 = 2.2586e-04
Loss = 3.8460e-01, PNorm = 61.9763, GNorm = 1.9327, lr_0 = 2.2571e-04
Loss = 3.5726e-01, PNorm = 61.9792, GNorm = 1.2278, lr_0 = 2.2555e-04
Loss = 3.7749e-01, PNorm = 61.9796, GNorm = 1.4402, lr_0 = 2.2540e-04
Loss = 3.8158e-01, PNorm = 61.9824, GNorm = 1.2455, lr_0 = 2.2524e-04
Loss = 3.3875e-01, PNorm = 61.9854, GNorm = 1.1800, lr_0 = 2.2509e-04
Loss = 3.3410e-01, PNorm = 61.9882, GNorm = 1.2332, lr_0 = 2.2493e-04
Loss = 3.2297e-01, PNorm = 61.9898, GNorm = 1.9503, lr_0 = 2.2478e-04
Loss = 3.9041e-01, PNorm = 61.9915, GNorm = 1.8381, lr_0 = 2.2463e-04
Loss = 3.4522e-01, PNorm = 61.9912, GNorm = 1.2384, lr_0 = 2.2447e-04
Loss = 3.5100e-01, PNorm = 61.9913, GNorm = 2.0576, lr_0 = 2.2432e-04
Loss = 3.4539e-01, PNorm = 61.9922, GNorm = 1.6671, lr_0 = 2.2416e-04
Loss = 3.5752e-01, PNorm = 61.9929, GNorm = 1.3311, lr_0 = 2.2401e-04
Loss = 3.4214e-01, PNorm = 61.9936, GNorm = 1.4315, lr_0 = 2.2386e-04
Loss = 3.7456e-01, PNorm = 61.9958, GNorm = 1.4450, lr_0 = 2.2370e-04
Loss = 3.6144e-01, PNorm = 61.9958, GNorm = 1.4261, lr_0 = 2.2355e-04
Loss = 3.3022e-01, PNorm = 61.9949, GNorm = 1.1722, lr_0 = 2.2340e-04
Loss = 3.5121e-01, PNorm = 61.9987, GNorm = 1.5738, lr_0 = 2.2324e-04
Loss = 3.6120e-01, PNorm = 62.0026, GNorm = 1.7726, lr_0 = 2.2309e-04
Loss = 4.0120e-01, PNorm = 62.0038, GNorm = 1.4183, lr_0 = 2.2294e-04
Loss = 3.9567e-01, PNorm = 62.0079, GNorm = 0.9591, lr_0 = 2.2279e-04
Loss = 3.7087e-01, PNorm = 62.0110, GNorm = 1.7738, lr_0 = 2.2263e-04
Loss = 3.8104e-01, PNorm = 62.0150, GNorm = 1.2200, lr_0 = 2.2248e-04
Loss = 3.5546e-01, PNorm = 62.0143, GNorm = 1.2997, lr_0 = 2.2233e-04
Loss = 3.5152e-01, PNorm = 62.0173, GNorm = 1.1426, lr_0 = 2.2218e-04
Loss = 3.5668e-01, PNorm = 62.0219, GNorm = 1.4687, lr_0 = 2.2202e-04
Loss = 3.7318e-01, PNorm = 62.0246, GNorm = 1.5740, lr_0 = 2.2187e-04
Loss = 3.5391e-01, PNorm = 62.0292, GNorm = 1.2551, lr_0 = 2.2172e-04
Loss = 3.7777e-01, PNorm = 62.0327, GNorm = 1.4027, lr_0 = 2.2157e-04
Loss = 3.5027e-01, PNorm = 62.0358, GNorm = 1.9406, lr_0 = 2.2142e-04
Loss = 3.7775e-01, PNorm = 62.0367, GNorm = 0.9361, lr_0 = 2.2126e-04
Loss = 3.5632e-01, PNorm = 62.0423, GNorm = 1.4395, lr_0 = 2.2111e-04
Loss = 4.1108e-01, PNorm = 62.0479, GNorm = 2.6679, lr_0 = 2.2096e-04
Loss = 3.9838e-01, PNorm = 62.0491, GNorm = 1.7429, lr_0 = 2.2081e-04
Loss = 3.8204e-01, PNorm = 62.0508, GNorm = 1.4266, lr_0 = 2.2066e-04
Loss = 3.3621e-01, PNorm = 62.0529, GNorm = 1.5305, lr_0 = 2.2051e-04
Loss = 3.4881e-01, PNorm = 62.0562, GNorm = 1.1226, lr_0 = 2.2036e-04
Loss = 3.8729e-01, PNorm = 62.0600, GNorm = 1.2957, lr_0 = 2.2021e-04
Loss = 3.3551e-01, PNorm = 62.0628, GNorm = 1.1343, lr_0 = 2.2005e-04
Loss = 3.6720e-01, PNorm = 62.0645, GNorm = 1.5522, lr_0 = 2.1990e-04
Loss = 3.8347e-01, PNorm = 62.0675, GNorm = 1.1376, lr_0 = 2.1975e-04
Loss = 3.8666e-01, PNorm = 62.0717, GNorm = 1.2543, lr_0 = 2.1960e-04
Loss = 3.5721e-01, PNorm = 62.0732, GNorm = 1.6765, lr_0 = 2.1945e-04
Loss = 3.9114e-01, PNorm = 62.0755, GNorm = 1.4070, lr_0 = 2.1930e-04
Loss = 3.6871e-01, PNorm = 62.0754, GNorm = 1.2511, lr_0 = 2.1915e-04
Loss = 4.0643e-01, PNorm = 62.0796, GNorm = 1.5053, lr_0 = 2.1900e-04
Loss = 3.1492e-01, PNorm = 62.0802, GNorm = 1.9815, lr_0 = 2.1885e-04
Loss = 4.0784e-01, PNorm = 62.0818, GNorm = 2.2785, lr_0 = 2.1870e-04
Loss = 3.7635e-01, PNorm = 62.0840, GNorm = 1.6287, lr_0 = 2.1855e-04
Loss = 4.2971e-01, PNorm = 62.0858, GNorm = 1.3072, lr_0 = 2.1840e-04
Loss = 3.1607e-01, PNorm = 62.0872, GNorm = 1.3821, lr_0 = 2.1825e-04
Loss = 3.3644e-01, PNorm = 62.0917, GNorm = 1.1652, lr_0 = 2.1810e-04
Loss = 3.3948e-01, PNorm = 62.0919, GNorm = 2.3768, lr_0 = 2.1795e-04
Loss = 3.4989e-01, PNorm = 62.0931, GNorm = 1.5827, lr_0 = 2.1780e-04
Loss = 4.1688e-01, PNorm = 62.0963, GNorm = 1.3297, lr_0 = 2.1765e-04
Loss = 3.8036e-01, PNorm = 62.0975, GNorm = 1.2387, lr_0 = 2.1751e-04
Loss = 3.3324e-01, PNorm = 62.0981, GNorm = 1.2451, lr_0 = 2.1736e-04
Loss = 4.2892e-01, PNorm = 62.0988, GNorm = 1.7752, lr_0 = 2.1721e-04
Loss = 3.6899e-01, PNorm = 62.1013, GNorm = 1.4212, lr_0 = 2.1706e-04
Loss = 3.6245e-01, PNorm = 62.1046, GNorm = 1.5821, lr_0 = 2.1691e-04
Loss = 3.6444e-01, PNorm = 62.1056, GNorm = 1.2234, lr_0 = 2.1676e-04
Loss = 3.6776e-01, PNorm = 62.1090, GNorm = 1.1943, lr_0 = 2.1661e-04
Loss = 3.7504e-01, PNorm = 62.1091, GNorm = 1.3598, lr_0 = 2.1646e-04
Loss = 3.9826e-01, PNorm = 62.1081, GNorm = 1.3947, lr_0 = 2.1632e-04
Loss = 3.6048e-01, PNorm = 62.1102, GNorm = 1.6627, lr_0 = 2.1617e-04
Loss = 3.7532e-01, PNorm = 62.1134, GNorm = 1.0875, lr_0 = 2.1602e-04
Loss = 3.6557e-01, PNorm = 62.1166, GNorm = 1.7747, lr_0 = 2.1587e-04
Loss = 3.7221e-01, PNorm = 62.1188, GNorm = 1.5908, lr_0 = 2.1572e-04
Loss = 3.9626e-01, PNorm = 62.1187, GNorm = 1.6603, lr_0 = 2.1558e-04
Loss = 3.1194e-01, PNorm = 62.1228, GNorm = 1.5144, lr_0 = 2.1543e-04
Loss = 3.5253e-01, PNorm = 62.1248, GNorm = 1.0322, lr_0 = 2.1528e-04
Loss = 3.5570e-01, PNorm = 62.1282, GNorm = 1.7707, lr_0 = 2.1513e-04
Loss = 3.7230e-01, PNorm = 62.1298, GNorm = 1.7278, lr_0 = 2.1499e-04
Loss = 3.5102e-01, PNorm = 62.1304, GNorm = 1.2696, lr_0 = 2.1484e-04
Loss = 3.5840e-01, PNorm = 62.1325, GNorm = 1.5742, lr_0 = 2.1469e-04
Loss = 3.6445e-01, PNorm = 62.1376, GNorm = 1.7375, lr_0 = 2.1454e-04
Loss = 4.2739e-01, PNorm = 62.1401, GNorm = 1.4697, lr_0 = 2.1440e-04
Loss = 4.1976e-01, PNorm = 62.1420, GNorm = 2.0341, lr_0 = 2.1425e-04
Loss = 3.3251e-01, PNorm = 62.1410, GNorm = 1.1025, lr_0 = 2.1410e-04
Loss = 2.8306e-01, PNorm = 62.1405, GNorm = 1.2096, lr_0 = 2.1396e-04
Loss = 3.4133e-01, PNorm = 62.1443, GNorm = 1.3397, lr_0 = 2.1381e-04
Loss = 4.0670e-01, PNorm = 62.1473, GNorm = 1.1843, lr_0 = 2.1366e-04
Loss = 3.3395e-01, PNorm = 62.1499, GNorm = 1.4521, lr_0 = 2.1352e-04
Loss = 3.8334e-01, PNorm = 62.1553, GNorm = 2.3096, lr_0 = 2.1337e-04
Loss = 3.7916e-01, PNorm = 62.1563, GNorm = 1.6142, lr_0 = 2.1323e-04
Loss = 3.1368e-01, PNorm = 62.1604, GNorm = 1.2926, lr_0 = 2.1308e-04
Loss = 3.4087e-01, PNorm = 62.1624, GNorm = 1.0988, lr_0 = 2.1293e-04
Loss = 3.4759e-01, PNorm = 62.1625, GNorm = 1.5373, lr_0 = 2.1279e-04
Loss = 3.9505e-01, PNorm = 62.1667, GNorm = 1.8728, lr_0 = 2.1264e-04
Loss = 3.6830e-01, PNorm = 62.1714, GNorm = 1.7258, lr_0 = 2.1250e-04
Loss = 3.3780e-01, PNorm = 62.1761, GNorm = 1.3251, lr_0 = 2.1235e-04
Loss = 3.5497e-01, PNorm = 62.1782, GNorm = 1.2813, lr_0 = 2.1221e-04
Loss = 3.7349e-01, PNorm = 62.1788, GNorm = 1.5468, lr_0 = 2.1206e-04
Loss = 3.4983e-01, PNorm = 62.1833, GNorm = 1.2676, lr_0 = 2.1191e-04
Loss = 3.7717e-01, PNorm = 62.1886, GNorm = 1.7535, lr_0 = 2.1177e-04
Loss = 3.3171e-01, PNorm = 62.1909, GNorm = 1.4001, lr_0 = 2.1162e-04
Loss = 3.5031e-01, PNorm = 62.1940, GNorm = 2.0296, lr_0 = 2.1148e-04
Loss = 3.9916e-01, PNorm = 62.1936, GNorm = 1.6726, lr_0 = 2.1133e-04
Loss = 3.9418e-01, PNorm = 62.1955, GNorm = 2.0388, lr_0 = 2.1119e-04
Loss = 3.5886e-01, PNorm = 62.1974, GNorm = 3.4583, lr_0 = 2.1104e-04
Loss = 4.0809e-01, PNorm = 62.2006, GNorm = 1.9687, lr_0 = 2.1090e-04
Loss = 3.3698e-01, PNorm = 62.2048, GNorm = 1.5039, lr_0 = 2.1076e-04
Loss = 3.7701e-01, PNorm = 62.2096, GNorm = 1.2685, lr_0 = 2.1061e-04
Loss = 3.3412e-01, PNorm = 62.2114, GNorm = 1.3750, lr_0 = 2.1047e-04
Loss = 3.8694e-01, PNorm = 62.2158, GNorm = 1.8935, lr_0 = 2.1032e-04
Loss = 3.6815e-01, PNorm = 62.2174, GNorm = 1.1503, lr_0 = 2.1018e-04
Loss = 4.1089e-01, PNorm = 62.2217, GNorm = 1.8288, lr_0 = 2.1003e-04
Loss = 3.6638e-01, PNorm = 62.2258, GNorm = 1.5023, lr_0 = 2.0989e-04
Loss = 3.3732e-01, PNorm = 62.2290, GNorm = 1.9671, lr_0 = 2.0975e-04
Loss = 3.6865e-01, PNorm = 62.2322, GNorm = 2.1290, lr_0 = 2.0960e-04
Validation mae = 0.111939
Epoch 21
Loss = 4.2656e-01, PNorm = 62.2332, GNorm = 1.2315, lr_0 = 2.0946e-04
Loss = 3.4659e-01, PNorm = 62.2360, GNorm = 1.5033, lr_0 = 2.0932e-04
Loss = 4.0728e-01, PNorm = 62.2343, GNorm = 1.0748, lr_0 = 2.0917e-04
Loss = 3.3722e-01, PNorm = 62.2317, GNorm = 1.4628, lr_0 = 2.0903e-04
Loss = 3.5240e-01, PNorm = 62.2350, GNorm = 1.3107, lr_0 = 2.0889e-04
Loss = 3.7075e-01, PNorm = 62.2368, GNorm = 1.6807, lr_0 = 2.0874e-04
Loss = 3.7226e-01, PNorm = 62.2422, GNorm = 1.5554, lr_0 = 2.0860e-04
Loss = 3.0091e-01, PNorm = 62.2447, GNorm = 1.0266, lr_0 = 2.0846e-04
Loss = 3.5622e-01, PNorm = 62.2461, GNorm = 0.8279, lr_0 = 2.0831e-04
Loss = 4.1953e-01, PNorm = 62.2480, GNorm = 1.1381, lr_0 = 2.0817e-04
Loss = 3.9443e-01, PNorm = 62.2525, GNorm = 1.4871, lr_0 = 2.0803e-04
Loss = 3.8569e-01, PNorm = 62.2530, GNorm = 2.6002, lr_0 = 2.0789e-04
Loss = 3.5880e-01, PNorm = 62.2511, GNorm = 1.8342, lr_0 = 2.0774e-04
Loss = 3.5127e-01, PNorm = 62.2559, GNorm = 0.9541, lr_0 = 2.0760e-04
Loss = 3.7319e-01, PNorm = 62.2578, GNorm = 1.3629, lr_0 = 2.0746e-04
Loss = 3.7038e-01, PNorm = 62.2593, GNorm = 1.3191, lr_0 = 2.0732e-04
Loss = 3.6494e-01, PNorm = 62.2622, GNorm = 1.4068, lr_0 = 2.0718e-04
Loss = 3.1922e-01, PNorm = 62.2637, GNorm = 1.2190, lr_0 = 2.0703e-04
Loss = 3.7069e-01, PNorm = 62.2610, GNorm = 1.4017, lr_0 = 2.0689e-04
Loss = 3.1655e-01, PNorm = 62.2620, GNorm = 1.2682, lr_0 = 2.0675e-04
Loss = 3.5284e-01, PNorm = 62.2659, GNorm = 1.7233, lr_0 = 2.0661e-04
Loss = 3.1834e-01, PNorm = 62.2685, GNorm = 1.1336, lr_0 = 2.0647e-04
Loss = 3.3185e-01, PNorm = 62.2714, GNorm = 1.8479, lr_0 = 2.0633e-04
Loss = 3.8725e-01, PNorm = 62.2721, GNorm = 1.2542, lr_0 = 2.0618e-04
Loss = 3.6976e-01, PNorm = 62.2735, GNorm = 1.7811, lr_0 = 2.0604e-04
Loss = 2.8851e-01, PNorm = 62.2764, GNorm = 1.0706, lr_0 = 2.0590e-04
Loss = 3.7047e-01, PNorm = 62.2755, GNorm = 1.3955, lr_0 = 2.0576e-04
Loss = 3.2072e-01, PNorm = 62.2773, GNorm = 1.4932, lr_0 = 2.0562e-04
Loss = 4.0585e-01, PNorm = 62.2783, GNorm = 1.6286, lr_0 = 2.0548e-04
Loss = 3.4006e-01, PNorm = 62.2806, GNorm = 1.0796, lr_0 = 2.0534e-04
Loss = 4.1575e-01, PNorm = 62.2828, GNorm = 1.2601, lr_0 = 2.0520e-04
Loss = 4.0215e-01, PNorm = 62.2835, GNorm = 1.6289, lr_0 = 2.0506e-04
Loss = 3.5305e-01, PNorm = 62.2864, GNorm = 1.6008, lr_0 = 2.0492e-04
Loss = 3.5912e-01, PNorm = 62.2882, GNorm = 1.2217, lr_0 = 2.0478e-04
Loss = 3.1915e-01, PNorm = 62.2903, GNorm = 1.5388, lr_0 = 2.0464e-04
Loss = 3.6131e-01, PNorm = 62.2955, GNorm = 2.0216, lr_0 = 2.0450e-04
Loss = 3.7630e-01, PNorm = 62.2970, GNorm = 2.5510, lr_0 = 2.0436e-04
Loss = 3.4229e-01, PNorm = 62.2971, GNorm = 1.4975, lr_0 = 2.0422e-04
Loss = 3.6843e-01, PNorm = 62.2970, GNorm = 1.4628, lr_0 = 2.0408e-04
Loss = 3.6750e-01, PNorm = 62.2992, GNorm = 1.0550, lr_0 = 2.0394e-04
Loss = 3.4366e-01, PNorm = 62.3032, GNorm = 1.2009, lr_0 = 2.0380e-04
Loss = 3.4730e-01, PNorm = 62.3072, GNorm = 1.4221, lr_0 = 2.0366e-04
Loss = 3.6038e-01, PNorm = 62.3070, GNorm = 1.4930, lr_0 = 2.0352e-04
Loss = 3.4446e-01, PNorm = 62.3103, GNorm = 1.2688, lr_0 = 2.0338e-04
Loss = 3.6326e-01, PNorm = 62.3153, GNorm = 1.0981, lr_0 = 2.0324e-04
Loss = 3.4476e-01, PNorm = 62.3182, GNorm = 1.5846, lr_0 = 2.0310e-04
Loss = 3.7211e-01, PNorm = 62.3182, GNorm = 1.4341, lr_0 = 2.0296e-04
Loss = 3.4679e-01, PNorm = 62.3199, GNorm = 1.0174, lr_0 = 2.0282e-04
Loss = 3.5535e-01, PNorm = 62.3229, GNorm = 1.4169, lr_0 = 2.0268e-04
Loss = 3.7276e-01, PNorm = 62.3249, GNorm = 1.5387, lr_0 = 2.0254e-04
Loss = 3.4710e-01, PNorm = 62.3295, GNorm = 1.3794, lr_0 = 2.0240e-04
Loss = 3.5237e-01, PNorm = 62.3323, GNorm = 1.2098, lr_0 = 2.0227e-04
Loss = 4.0207e-01, PNorm = 62.3307, GNorm = 1.6129, lr_0 = 2.0213e-04
Loss = 3.3353e-01, PNorm = 62.3325, GNorm = 1.4010, lr_0 = 2.0199e-04
Loss = 3.7640e-01, PNorm = 62.3347, GNorm = 1.6046, lr_0 = 2.0185e-04
Loss = 3.2264e-01, PNorm = 62.3365, GNorm = 1.8518, lr_0 = 2.0171e-04
Loss = 3.7134e-01, PNorm = 62.3381, GNorm = 1.2815, lr_0 = 2.0157e-04
Loss = 3.3426e-01, PNorm = 62.3361, GNorm = 1.1837, lr_0 = 2.0144e-04
Loss = 3.0620e-01, PNorm = 62.3374, GNorm = 0.9542, lr_0 = 2.0130e-04
Loss = 3.5011e-01, PNorm = 62.3391, GNorm = 1.1761, lr_0 = 2.0116e-04
Loss = 3.1727e-01, PNorm = 62.3391, GNorm = 1.1938, lr_0 = 2.0102e-04
Loss = 3.7641e-01, PNorm = 62.3417, GNorm = 1.5818, lr_0 = 2.0088e-04
Loss = 3.3398e-01, PNorm = 62.3439, GNorm = 1.9028, lr_0 = 2.0075e-04
Loss = 3.3187e-01, PNorm = 62.3458, GNorm = 1.7444, lr_0 = 2.0061e-04
Loss = 3.3838e-01, PNorm = 62.3477, GNorm = 1.2454, lr_0 = 2.0047e-04
Loss = 3.4779e-01, PNorm = 62.3516, GNorm = 1.2147, lr_0 = 2.0033e-04
Loss = 3.5029e-01, PNorm = 62.3554, GNorm = 1.6835, lr_0 = 2.0020e-04
Loss = 3.1339e-01, PNorm = 62.3581, GNorm = 1.4674, lr_0 = 2.0006e-04
Loss = 3.3917e-01, PNorm = 62.3584, GNorm = 1.1826, lr_0 = 1.9992e-04
Loss = 3.6136e-01, PNorm = 62.3586, GNorm = 1.2648, lr_0 = 1.9979e-04
Loss = 3.6029e-01, PNorm = 62.3596, GNorm = 1.3543, lr_0 = 1.9965e-04
Loss = 3.5076e-01, PNorm = 62.3621, GNorm = 1.3174, lr_0 = 1.9951e-04
Loss = 3.4148e-01, PNorm = 62.3637, GNorm = 1.4900, lr_0 = 1.9938e-04
Loss = 3.5695e-01, PNorm = 62.3656, GNorm = 1.3855, lr_0 = 1.9924e-04
Loss = 3.4485e-01, PNorm = 62.3711, GNorm = 1.8217, lr_0 = 1.9910e-04
Loss = 3.1306e-01, PNorm = 62.3733, GNorm = 1.2523, lr_0 = 1.9897e-04
Loss = 3.4409e-01, PNorm = 62.3756, GNorm = 1.4124, lr_0 = 1.9883e-04
Loss = 3.7952e-01, PNorm = 62.3776, GNorm = 1.6357, lr_0 = 1.9869e-04
Loss = 3.7978e-01, PNorm = 62.3808, GNorm = 1.5118, lr_0 = 1.9856e-04
Loss = 4.0782e-01, PNorm = 62.3846, GNorm = 1.9191, lr_0 = 1.9842e-04
Loss = 3.8542e-01, PNorm = 62.3841, GNorm = 2.5864, lr_0 = 1.9829e-04
Loss = 3.1946e-01, PNorm = 62.3855, GNorm = 1.7415, lr_0 = 1.9815e-04
Loss = 3.6774e-01, PNorm = 62.3897, GNorm = 1.2429, lr_0 = 1.9801e-04
Loss = 3.3188e-01, PNorm = 62.3896, GNorm = 2.4453, lr_0 = 1.9788e-04
Loss = 3.8764e-01, PNorm = 62.3908, GNorm = 1.6044, lr_0 = 1.9774e-04
Loss = 3.7097e-01, PNorm = 62.3937, GNorm = 1.5506, lr_0 = 1.9761e-04
Loss = 3.4816e-01, PNorm = 62.3938, GNorm = 1.0118, lr_0 = 1.9747e-04
Loss = 4.0559e-01, PNorm = 62.3947, GNorm = 1.2431, lr_0 = 1.9734e-04
Loss = 3.7006e-01, PNorm = 62.3972, GNorm = 1.3352, lr_0 = 1.9720e-04
Loss = 3.7489e-01, PNorm = 62.4008, GNorm = 1.6532, lr_0 = 1.9707e-04
Loss = 3.7119e-01, PNorm = 62.4006, GNorm = 1.2950, lr_0 = 1.9693e-04
Loss = 3.9019e-01, PNorm = 62.4018, GNorm = 2.1810, lr_0 = 1.9680e-04
Loss = 3.9814e-01, PNorm = 62.4030, GNorm = 1.2094, lr_0 = 1.9666e-04
Loss = 3.6009e-01, PNorm = 62.4061, GNorm = 1.3122, lr_0 = 1.9653e-04
Loss = 3.5921e-01, PNorm = 62.4099, GNorm = 1.2631, lr_0 = 1.9639e-04
Loss = 3.2307e-01, PNorm = 62.4094, GNorm = 1.2665, lr_0 = 1.9626e-04
Loss = 3.4858e-01, PNorm = 62.4094, GNorm = 1.2713, lr_0 = 1.9612e-04
Loss = 3.4562e-01, PNorm = 62.4110, GNorm = 1.4487, lr_0 = 1.9599e-04
Loss = 4.0325e-01, PNorm = 62.4144, GNorm = 2.1835, lr_0 = 1.9585e-04
Loss = 3.7515e-01, PNorm = 62.4157, GNorm = 1.2502, lr_0 = 1.9572e-04
Loss = 3.9769e-01, PNorm = 62.4189, GNorm = 2.0753, lr_0 = 1.9559e-04
Loss = 4.2812e-01, PNorm = 62.4215, GNorm = 1.5178, lr_0 = 1.9545e-04
Loss = 3.8779e-01, PNorm = 62.4217, GNorm = 1.4615, lr_0 = 1.9532e-04
Loss = 3.5329e-01, PNorm = 62.4237, GNorm = 1.9738, lr_0 = 1.9518e-04
Loss = 3.6076e-01, PNorm = 62.4244, GNorm = 1.7979, lr_0 = 1.9505e-04
Loss = 3.9020e-01, PNorm = 62.4292, GNorm = 1.4578, lr_0 = 1.9492e-04
Loss = 3.0973e-01, PNorm = 62.4328, GNorm = 1.3680, lr_0 = 1.9478e-04
Loss = 4.2447e-01, PNorm = 62.4338, GNorm = 1.5188, lr_0 = 1.9465e-04
Loss = 3.1168e-01, PNorm = 62.4373, GNorm = 1.6471, lr_0 = 1.9452e-04
Loss = 3.7057e-01, PNorm = 62.4370, GNorm = 1.0132, lr_0 = 1.9438e-04
Loss = 3.6825e-01, PNorm = 62.4404, GNorm = 1.5836, lr_0 = 1.9425e-04
Loss = 3.5797e-01, PNorm = 62.4426, GNorm = 1.6121, lr_0 = 1.9412e-04
Loss = 3.7261e-01, PNorm = 62.4427, GNorm = 1.8245, lr_0 = 1.9398e-04
Loss = 3.3582e-01, PNorm = 62.4453, GNorm = 1.1249, lr_0 = 1.9385e-04
Loss = 4.1593e-01, PNorm = 62.4467, GNorm = 2.0940, lr_0 = 1.9372e-04
Loss = 3.6853e-01, PNorm = 62.4500, GNorm = 1.4225, lr_0 = 1.9359e-04
Loss = 3.6787e-01, PNorm = 62.4535, GNorm = 1.4361, lr_0 = 1.9345e-04
Loss = 3.7172e-01, PNorm = 62.4599, GNorm = 1.6343, lr_0 = 1.9332e-04
Loss = 3.6233e-01, PNorm = 62.4633, GNorm = 1.4365, lr_0 = 1.9319e-04
Loss = 3.9609e-01, PNorm = 62.4638, GNorm = 1.5280, lr_0 = 1.9306e-04
Validation mae = 0.112393
Epoch 22
Loss = 3.9523e-01, PNorm = 62.4672, GNorm = 1.4921, lr_0 = 1.9292e-04
Loss = 3.2376e-01, PNorm = 62.4698, GNorm = 1.2287, lr_0 = 1.9279e-04
Loss = 4.3352e-01, PNorm = 62.4700, GNorm = 1.3875, lr_0 = 1.9266e-04
Loss = 3.8809e-01, PNorm = 62.4692, GNorm = 1.5057, lr_0 = 1.9253e-04
Loss = 3.4184e-01, PNorm = 62.4731, GNorm = 1.6962, lr_0 = 1.9240e-04
Loss = 3.1736e-01, PNorm = 62.4764, GNorm = 2.6130, lr_0 = 1.9226e-04
Loss = 3.6760e-01, PNorm = 62.4785, GNorm = 1.1787, lr_0 = 1.9213e-04
Loss = 3.4860e-01, PNorm = 62.4824, GNorm = 1.1721, lr_0 = 1.9200e-04
Loss = 3.9520e-01, PNorm = 62.4830, GNorm = 1.2325, lr_0 = 1.9187e-04
Loss = 3.3689e-01, PNorm = 62.4867, GNorm = 1.5175, lr_0 = 1.9174e-04
Loss = 3.1528e-01, PNorm = 62.4893, GNorm = 1.1777, lr_0 = 1.9161e-04
Loss = 4.2404e-01, PNorm = 62.4908, GNorm = 1.3170, lr_0 = 1.9148e-04
Loss = 3.0309e-01, PNorm = 62.4935, GNorm = 1.2967, lr_0 = 1.9134e-04
Loss = 3.3553e-01, PNorm = 62.4985, GNorm = 2.1786, lr_0 = 1.9121e-04
Loss = 3.4439e-01, PNorm = 62.5019, GNorm = 1.5488, lr_0 = 1.9108e-04
Loss = 3.6443e-01, PNorm = 62.5029, GNorm = 1.8509, lr_0 = 1.9095e-04
Loss = 3.5684e-01, PNorm = 62.5043, GNorm = 1.4136, lr_0 = 1.9082e-04
Loss = 3.6367e-01, PNorm = 62.5064, GNorm = 1.4217, lr_0 = 1.9069e-04
Loss = 3.8857e-01, PNorm = 62.5070, GNorm = 1.3544, lr_0 = 1.9056e-04
Loss = 3.5621e-01, PNorm = 62.5093, GNorm = 1.7870, lr_0 = 1.9043e-04
Loss = 3.6386e-01, PNorm = 62.5118, GNorm = 1.6237, lr_0 = 1.9030e-04
Loss = 3.6901e-01, PNorm = 62.5135, GNorm = 1.1206, lr_0 = 1.9017e-04
Loss = 3.7789e-01, PNorm = 62.5169, GNorm = 2.4636, lr_0 = 1.9004e-04
Loss = 3.5448e-01, PNorm = 62.5162, GNorm = 1.1607, lr_0 = 1.8991e-04
Loss = 3.1486e-01, PNorm = 62.5181, GNorm = 1.1925, lr_0 = 1.8978e-04
Loss = 3.5568e-01, PNorm = 62.5201, GNorm = 1.2699, lr_0 = 1.8965e-04
Loss = 3.1195e-01, PNorm = 62.5225, GNorm = 1.8103, lr_0 = 1.8952e-04
Loss = 3.8350e-01, PNorm = 62.5254, GNorm = 1.3986, lr_0 = 1.8939e-04
Loss = 3.7922e-01, PNorm = 62.5284, GNorm = 1.3776, lr_0 = 1.8926e-04
Loss = 3.2930e-01, PNorm = 62.5295, GNorm = 1.4923, lr_0 = 1.8913e-04
Loss = 3.5709e-01, PNorm = 62.5293, GNorm = 1.4786, lr_0 = 1.8900e-04
Loss = 3.9014e-01, PNorm = 62.5319, GNorm = 1.3707, lr_0 = 1.8887e-04
Loss = 3.7573e-01, PNorm = 62.5345, GNorm = 1.5840, lr_0 = 1.8874e-04
Loss = 3.0643e-01, PNorm = 62.5359, GNorm = 1.5763, lr_0 = 1.8861e-04
Loss = 3.8260e-01, PNorm = 62.5376, GNorm = 1.5453, lr_0 = 1.8848e-04
Loss = 3.3610e-01, PNorm = 62.5380, GNorm = 1.3785, lr_0 = 1.8835e-04
Loss = 3.9398e-01, PNorm = 62.5392, GNorm = 2.2859, lr_0 = 1.8822e-04
Loss = 3.4252e-01, PNorm = 62.5417, GNorm = 1.6963, lr_0 = 1.8809e-04
Loss = 3.4243e-01, PNorm = 62.5431, GNorm = 1.8281, lr_0 = 1.8797e-04
Loss = 3.8486e-01, PNorm = 62.5445, GNorm = 1.4125, lr_0 = 1.8784e-04
Loss = 3.5465e-01, PNorm = 62.5469, GNorm = 1.6661, lr_0 = 1.8771e-04
Loss = 3.8211e-01, PNorm = 62.5501, GNorm = 1.2964, lr_0 = 1.8758e-04
Loss = 3.4461e-01, PNorm = 62.5524, GNorm = 1.1305, lr_0 = 1.8745e-04
Loss = 3.6075e-01, PNorm = 62.5538, GNorm = 1.4063, lr_0 = 1.8732e-04
Loss = 3.4321e-01, PNorm = 62.5574, GNorm = 1.4650, lr_0 = 1.8719e-04
Loss = 3.6884e-01, PNorm = 62.5611, GNorm = 1.8170, lr_0 = 1.8707e-04
Loss = 3.5482e-01, PNorm = 62.5652, GNorm = 1.6839, lr_0 = 1.8694e-04
Loss = 3.7521e-01, PNorm = 62.5659, GNorm = 1.4324, lr_0 = 1.8681e-04
Loss = 3.5009e-01, PNorm = 62.5689, GNorm = 1.6703, lr_0 = 1.8668e-04
Loss = 3.9290e-01, PNorm = 62.5738, GNorm = 1.3272, lr_0 = 1.8655e-04
Loss = 3.8503e-01, PNorm = 62.5745, GNorm = 1.2988, lr_0 = 1.8643e-04
Loss = 3.5200e-01, PNorm = 62.5776, GNorm = 0.9804, lr_0 = 1.8630e-04
Loss = 3.6825e-01, PNorm = 62.5791, GNorm = 1.2220, lr_0 = 1.8617e-04
Loss = 3.4937e-01, PNorm = 62.5788, GNorm = 1.5217, lr_0 = 1.8604e-04
Loss = 3.7845e-01, PNorm = 62.5802, GNorm = 1.3835, lr_0 = 1.8592e-04
Loss = 3.9449e-01, PNorm = 62.5821, GNorm = 1.2710, lr_0 = 1.8579e-04
Loss = 3.1693e-01, PNorm = 62.5814, GNorm = 1.5171, lr_0 = 1.8566e-04
Loss = 3.9372e-01, PNorm = 62.5819, GNorm = 1.2491, lr_0 = 1.8553e-04
Loss = 3.3649e-01, PNorm = 62.5843, GNorm = 1.5469, lr_0 = 1.8541e-04
Loss = 3.6474e-01, PNorm = 62.5834, GNorm = 1.7169, lr_0 = 1.8528e-04
Loss = 3.6612e-01, PNorm = 62.5847, GNorm = 1.2777, lr_0 = 1.8515e-04
Loss = 3.3840e-01, PNorm = 62.5847, GNorm = 1.7363, lr_0 = 1.8503e-04
Loss = 3.9344e-01, PNorm = 62.5879, GNorm = 2.1343, lr_0 = 1.8490e-04
Loss = 3.7205e-01, PNorm = 62.5905, GNorm = 1.5748, lr_0 = 1.8477e-04
Loss = 3.3662e-01, PNorm = 62.5946, GNorm = 1.3995, lr_0 = 1.8465e-04
Loss = 3.8591e-01, PNorm = 62.5973, GNorm = 1.7530, lr_0 = 1.8452e-04
Loss = 3.3465e-01, PNorm = 62.5968, GNorm = 1.0033, lr_0 = 1.8439e-04
Loss = 3.6384e-01, PNorm = 62.5982, GNorm = 1.1211, lr_0 = 1.8427e-04
Loss = 3.7204e-01, PNorm = 62.5993, GNorm = 1.4055, lr_0 = 1.8414e-04
Loss = 3.1621e-01, PNorm = 62.6015, GNorm = 1.2720, lr_0 = 1.8401e-04
Loss = 3.7704e-01, PNorm = 62.6029, GNorm = 1.5912, lr_0 = 1.8389e-04
Loss = 3.1595e-01, PNorm = 62.6028, GNorm = 1.1683, lr_0 = 1.8376e-04
Loss = 3.6762e-01, PNorm = 62.6028, GNorm = 1.1033, lr_0 = 1.8364e-04
Loss = 3.0492e-01, PNorm = 62.6043, GNorm = 1.3982, lr_0 = 1.8351e-04
Loss = 3.3733e-01, PNorm = 62.6078, GNorm = 1.2279, lr_0 = 1.8338e-04
Loss = 4.2178e-01, PNorm = 62.6088, GNorm = 1.5920, lr_0 = 1.8326e-04
Loss = 3.1653e-01, PNorm = 62.6095, GNorm = 1.5372, lr_0 = 1.8313e-04
Loss = 3.4567e-01, PNorm = 62.6085, GNorm = 1.3208, lr_0 = 1.8301e-04
Loss = 3.4089e-01, PNorm = 62.6076, GNorm = 1.1116, lr_0 = 1.8288e-04
Loss = 3.5484e-01, PNorm = 62.6109, GNorm = 1.1286, lr_0 = 1.8276e-04
Loss = 3.8206e-01, PNorm = 62.6126, GNorm = 1.4788, lr_0 = 1.8263e-04
Loss = 3.8895e-01, PNorm = 62.6125, GNorm = 1.6240, lr_0 = 1.8251e-04
Loss = 3.0485e-01, PNorm = 62.6112, GNorm = 1.4396, lr_0 = 1.8238e-04
Loss = 3.3144e-01, PNorm = 62.6133, GNorm = 1.4424, lr_0 = 1.8226e-04
Loss = 3.6161e-01, PNorm = 62.6170, GNorm = 1.5246, lr_0 = 1.8213e-04
Loss = 3.6870e-01, PNorm = 62.6182, GNorm = 1.3418, lr_0 = 1.8201e-04
Loss = 3.9019e-01, PNorm = 62.6176, GNorm = 1.2526, lr_0 = 1.8188e-04
Loss = 3.9922e-01, PNorm = 62.6188, GNorm = 1.7957, lr_0 = 1.8176e-04
Loss = 3.2985e-01, PNorm = 62.6219, GNorm = 1.3492, lr_0 = 1.8163e-04
Loss = 3.1273e-01, PNorm = 62.6235, GNorm = 1.0778, lr_0 = 1.8151e-04
Loss = 3.8130e-01, PNorm = 62.6254, GNorm = 1.2457, lr_0 = 1.8138e-04
Loss = 3.3175e-01, PNorm = 62.6299, GNorm = 1.3947, lr_0 = 1.8126e-04
Loss = 4.0639e-01, PNorm = 62.6316, GNorm = 1.9113, lr_0 = 1.8114e-04
Loss = 3.1099e-01, PNorm = 62.6319, GNorm = 1.2336, lr_0 = 1.8101e-04
Loss = 3.5018e-01, PNorm = 62.6318, GNorm = 1.9377, lr_0 = 1.8089e-04
Loss = 3.1633e-01, PNorm = 62.6335, GNorm = 1.2085, lr_0 = 1.8076e-04
Loss = 3.7793e-01, PNorm = 62.6353, GNorm = 1.0412, lr_0 = 1.8064e-04
Loss = 3.4537e-01, PNorm = 62.6390, GNorm = 2.6957, lr_0 = 1.8052e-04
Loss = 3.4582e-01, PNorm = 62.6427, GNorm = 1.8140, lr_0 = 1.8039e-04
Loss = 3.7375e-01, PNorm = 62.6434, GNorm = 1.4232, lr_0 = 1.8027e-04
Loss = 3.7531e-01, PNorm = 62.6423, GNorm = 1.8584, lr_0 = 1.8015e-04
Loss = 3.4006e-01, PNorm = 62.6465, GNorm = 1.3765, lr_0 = 1.8002e-04
Loss = 3.4455e-01, PNorm = 62.6483, GNorm = 1.9323, lr_0 = 1.7990e-04
Loss = 3.3111e-01, PNorm = 62.6516, GNorm = 2.2179, lr_0 = 1.7978e-04
Loss = 3.8004e-01, PNorm = 62.6538, GNorm = 1.5056, lr_0 = 1.7965e-04
Loss = 3.4956e-01, PNorm = 62.6566, GNorm = 1.3024, lr_0 = 1.7953e-04
Loss = 3.9984e-01, PNorm = 62.6588, GNorm = 2.0847, lr_0 = 1.7941e-04
Loss = 3.1217e-01, PNorm = 62.6611, GNorm = 1.1500, lr_0 = 1.7928e-04
Loss = 3.3027e-01, PNorm = 62.6637, GNorm = 1.9362, lr_0 = 1.7916e-04
Loss = 3.8742e-01, PNorm = 62.6686, GNorm = 1.4646, lr_0 = 1.7904e-04
Loss = 3.7106e-01, PNorm = 62.6690, GNorm = 1.8709, lr_0 = 1.7892e-04
Loss = 3.4346e-01, PNorm = 62.6734, GNorm = 1.7346, lr_0 = 1.7879e-04
Loss = 3.4765e-01, PNorm = 62.6788, GNorm = 1.8318, lr_0 = 1.7867e-04
Loss = 3.1601e-01, PNorm = 62.6803, GNorm = 1.2872, lr_0 = 1.7855e-04
Loss = 3.4616e-01, PNorm = 62.6808, GNorm = 1.2777, lr_0 = 1.7843e-04
Loss = 3.2528e-01, PNorm = 62.6824, GNorm = 1.2579, lr_0 = 1.7830e-04
Loss = 3.5537e-01, PNorm = 62.6830, GNorm = 1.6621, lr_0 = 1.7818e-04
Loss = 3.5332e-01, PNorm = 62.6852, GNorm = 1.3022, lr_0 = 1.7806e-04
Loss = 3.5578e-01, PNorm = 62.6872, GNorm = 2.0254, lr_0 = 1.7794e-04
Loss = 3.9908e-01, PNorm = 62.6852, GNorm = 1.7205, lr_0 = 1.7782e-04
Validation mae = 0.112197
Epoch 23
Loss = 3.7799e-01, PNorm = 62.6867, GNorm = 0.9545, lr_0 = 1.7769e-04
Loss = 3.7051e-01, PNorm = 62.6896, GNorm = 1.3853, lr_0 = 1.7757e-04
Loss = 3.8550e-01, PNorm = 62.6908, GNorm = 1.2558, lr_0 = 1.7745e-04
Loss = 3.3820e-01, PNorm = 62.6939, GNorm = 1.2909, lr_0 = 1.7733e-04
Loss = 2.8884e-01, PNorm = 62.6971, GNorm = 1.6355, lr_0 = 1.7721e-04
Loss = 3.5807e-01, PNorm = 62.6987, GNorm = 1.1447, lr_0 = 1.7709e-04
Loss = 3.7741e-01, PNorm = 62.7008, GNorm = 1.7259, lr_0 = 1.7696e-04
Loss = 3.3054e-01, PNorm = 62.7008, GNorm = 1.6835, lr_0 = 1.7684e-04
Loss = 3.7032e-01, PNorm = 62.7020, GNorm = 1.4630, lr_0 = 1.7672e-04
Loss = 3.1943e-01, PNorm = 62.7044, GNorm = 1.6995, lr_0 = 1.7660e-04
Loss = 2.9899e-01, PNorm = 62.7051, GNorm = 1.6566, lr_0 = 1.7648e-04
Loss = 3.5234e-01, PNorm = 62.7069, GNorm = 1.7337, lr_0 = 1.7636e-04
Loss = 3.6266e-01, PNorm = 62.7067, GNorm = 1.5889, lr_0 = 1.7624e-04
Loss = 3.6517e-01, PNorm = 62.7097, GNorm = 1.3712, lr_0 = 1.7612e-04
Loss = 3.3285e-01, PNorm = 62.7141, GNorm = 1.1528, lr_0 = 1.7600e-04
Loss = 3.0646e-01, PNorm = 62.7156, GNorm = 0.9935, lr_0 = 1.7588e-04
Loss = 3.7275e-01, PNorm = 62.7183, GNorm = 1.6273, lr_0 = 1.7576e-04
Loss = 3.4768e-01, PNorm = 62.7189, GNorm = 1.5705, lr_0 = 1.7564e-04
Loss = 3.7177e-01, PNorm = 62.7210, GNorm = 1.5358, lr_0 = 1.7552e-04
Loss = 3.4911e-01, PNorm = 62.7211, GNorm = 1.1155, lr_0 = 1.7540e-04
Loss = 3.4368e-01, PNorm = 62.7229, GNorm = 1.4028, lr_0 = 1.7528e-04
Loss = 3.3842e-01, PNorm = 62.7253, GNorm = 1.6535, lr_0 = 1.7516e-04
Loss = 3.9165e-01, PNorm = 62.7258, GNorm = 1.4134, lr_0 = 1.7504e-04
Loss = 3.7235e-01, PNorm = 62.7298, GNorm = 1.6802, lr_0 = 1.7492e-04
Loss = 3.7446e-01, PNorm = 62.7308, GNorm = 1.4865, lr_0 = 1.7480e-04
Loss = 3.6695e-01, PNorm = 62.7316, GNorm = 1.3275, lr_0 = 1.7468e-04
Loss = 3.6047e-01, PNorm = 62.7361, GNorm = 1.0941, lr_0 = 1.7456e-04
Loss = 4.2514e-01, PNorm = 62.7394, GNorm = 1.3561, lr_0 = 1.7444e-04
Loss = 3.5214e-01, PNorm = 62.7426, GNorm = 1.5449, lr_0 = 1.7432e-04
Loss = 3.5234e-01, PNorm = 62.7437, GNorm = 1.5498, lr_0 = 1.7420e-04
Loss = 3.4347e-01, PNorm = 62.7456, GNorm = 1.5434, lr_0 = 1.7408e-04
Loss = 3.5744e-01, PNorm = 62.7470, GNorm = 1.6195, lr_0 = 1.7396e-04
Loss = 3.4469e-01, PNorm = 62.7478, GNorm = 1.7185, lr_0 = 1.7384e-04
Loss = 3.2459e-01, PNorm = 62.7517, GNorm = 1.6126, lr_0 = 1.7372e-04
Loss = 3.1221e-01, PNorm = 62.7533, GNorm = 1.2086, lr_0 = 1.7360e-04
Loss = 4.3428e-01, PNorm = 62.7540, GNorm = 2.0333, lr_0 = 1.7348e-04
Loss = 3.3289e-01, PNorm = 62.7569, GNorm = 1.3748, lr_0 = 1.7336e-04
Loss = 3.6342e-01, PNorm = 62.7580, GNorm = 1.3755, lr_0 = 1.7325e-04
Loss = 3.4636e-01, PNorm = 62.7597, GNorm = 1.5389, lr_0 = 1.7313e-04
Loss = 3.9209e-01, PNorm = 62.7611, GNorm = 1.5779, lr_0 = 1.7301e-04
Loss = 3.2915e-01, PNorm = 62.7619, GNorm = 1.2934, lr_0 = 1.7289e-04
Loss = 3.8191e-01, PNorm = 62.7657, GNorm = 1.2551, lr_0 = 1.7277e-04
Loss = 3.3961e-01, PNorm = 62.7680, GNorm = 1.3180, lr_0 = 1.7265e-04
Loss = 3.5767e-01, PNorm = 62.7681, GNorm = 1.4705, lr_0 = 1.7253e-04
Loss = 3.3295e-01, PNorm = 62.7703, GNorm = 1.2893, lr_0 = 1.7242e-04
Loss = 2.9758e-01, PNorm = 62.7716, GNorm = 1.4529, lr_0 = 1.7230e-04
Loss = 3.6970e-01, PNorm = 62.7759, GNorm = 1.6662, lr_0 = 1.7218e-04
Loss = 3.2117e-01, PNorm = 62.7788, GNorm = 2.0484, lr_0 = 1.7206e-04
Loss = 4.1530e-01, PNorm = 62.7785, GNorm = 1.8014, lr_0 = 1.7194e-04
Loss = 3.7861e-01, PNorm = 62.7812, GNorm = 1.6944, lr_0 = 1.7183e-04
Loss = 3.4742e-01, PNorm = 62.7820, GNorm = 0.9687, lr_0 = 1.7171e-04
Loss = 4.0005e-01, PNorm = 62.7834, GNorm = 1.8028, lr_0 = 1.7159e-04
Loss = 3.4501e-01, PNorm = 62.7835, GNorm = 2.1180, lr_0 = 1.7147e-04
Loss = 3.6868e-01, PNorm = 62.7829, GNorm = 1.0634, lr_0 = 1.7136e-04
Loss = 3.7798e-01, PNorm = 62.7863, GNorm = 1.7149, lr_0 = 1.7124e-04
Loss = 3.7425e-01, PNorm = 62.7894, GNorm = 1.3893, lr_0 = 1.7112e-04
Loss = 3.5788e-01, PNorm = 62.7907, GNorm = 1.3536, lr_0 = 1.7100e-04
Loss = 3.4144e-01, PNorm = 62.7931, GNorm = 1.4183, lr_0 = 1.7089e-04
Loss = 3.6742e-01, PNorm = 62.7938, GNorm = 1.6170, lr_0 = 1.7077e-04
Loss = 3.7060e-01, PNorm = 62.7953, GNorm = 1.5015, lr_0 = 1.7065e-04
Loss = 3.8052e-01, PNorm = 62.7980, GNorm = 1.3185, lr_0 = 1.7054e-04
Loss = 3.5805e-01, PNorm = 62.7989, GNorm = 1.5215, lr_0 = 1.7042e-04
Loss = 3.8336e-01, PNorm = 62.8012, GNorm = 1.9878, lr_0 = 1.7030e-04
Loss = 3.8447e-01, PNorm = 62.8028, GNorm = 1.6866, lr_0 = 1.7019e-04
Loss = 3.3042e-01, PNorm = 62.8053, GNorm = 1.4245, lr_0 = 1.7007e-04
Loss = 3.9553e-01, PNorm = 62.8071, GNorm = 1.6883, lr_0 = 1.6995e-04
Loss = 3.5666e-01, PNorm = 62.8082, GNorm = 1.9298, lr_0 = 1.6984e-04
Loss = 3.2381e-01, PNorm = 62.8081, GNorm = 1.5892, lr_0 = 1.6972e-04
Loss = 3.2054e-01, PNorm = 62.8098, GNorm = 1.1803, lr_0 = 1.6960e-04
Loss = 3.6161e-01, PNorm = 62.8107, GNorm = 1.7777, lr_0 = 1.6949e-04
Loss = 3.0866e-01, PNorm = 62.8130, GNorm = 1.2034, lr_0 = 1.6937e-04
Loss = 3.7259e-01, PNorm = 62.8133, GNorm = 1.3995, lr_0 = 1.6926e-04
Loss = 3.8293e-01, PNorm = 62.8155, GNorm = 1.5121, lr_0 = 1.6914e-04
Loss = 3.5647e-01, PNorm = 62.8154, GNorm = 1.4643, lr_0 = 1.6902e-04
Loss = 3.6022e-01, PNorm = 62.8177, GNorm = 1.7580, lr_0 = 1.6891e-04
Loss = 3.4418e-01, PNorm = 62.8192, GNorm = 1.5136, lr_0 = 1.6879e-04
Loss = 2.8442e-01, PNorm = 62.8207, GNorm = 1.3079, lr_0 = 1.6868e-04
Loss = 3.6029e-01, PNorm = 62.8212, GNorm = 1.4477, lr_0 = 1.6856e-04
Loss = 3.1330e-01, PNorm = 62.8221, GNorm = 1.5230, lr_0 = 1.6845e-04
Loss = 3.5743e-01, PNorm = 62.8249, GNorm = 1.4896, lr_0 = 1.6833e-04
Loss = 3.4618e-01, PNorm = 62.8275, GNorm = 1.4075, lr_0 = 1.6821e-04
Loss = 3.5536e-01, PNorm = 62.8301, GNorm = 1.9553, lr_0 = 1.6810e-04
Loss = 3.4092e-01, PNorm = 62.8317, GNorm = 1.4375, lr_0 = 1.6798e-04
Loss = 3.4583e-01, PNorm = 62.8325, GNorm = 1.1034, lr_0 = 1.6787e-04
Loss = 3.5844e-01, PNorm = 62.8330, GNorm = 1.6056, lr_0 = 1.6775e-04
Loss = 3.4449e-01, PNorm = 62.8351, GNorm = 1.6919, lr_0 = 1.6764e-04
Loss = 3.2978e-01, PNorm = 62.8376, GNorm = 2.0359, lr_0 = 1.6752e-04
Loss = 3.4095e-01, PNorm = 62.8422, GNorm = 1.4661, lr_0 = 1.6741e-04
Loss = 3.3777e-01, PNorm = 62.8426, GNorm = 1.4607, lr_0 = 1.6729e-04
Loss = 3.7728e-01, PNorm = 62.8442, GNorm = 1.4143, lr_0 = 1.6718e-04
Loss = 3.6309e-01, PNorm = 62.8476, GNorm = 1.0188, lr_0 = 1.6707e-04
Loss = 3.3033e-01, PNorm = 62.8493, GNorm = 1.4265, lr_0 = 1.6695e-04
Loss = 3.7160e-01, PNorm = 62.8518, GNorm = 1.4690, lr_0 = 1.6684e-04
Loss = 3.0372e-01, PNorm = 62.8539, GNorm = 1.4822, lr_0 = 1.6672e-04
Loss = 3.3501e-01, PNorm = 62.8575, GNorm = 1.3635, lr_0 = 1.6661e-04
Loss = 3.2223e-01, PNorm = 62.8593, GNorm = 1.2590, lr_0 = 1.6649e-04
Loss = 3.6548e-01, PNorm = 62.8594, GNorm = 1.9857, lr_0 = 1.6638e-04
Loss = 3.4054e-01, PNorm = 62.8601, GNorm = 1.2571, lr_0 = 1.6627e-04
Loss = 3.2125e-01, PNorm = 62.8603, GNorm = 1.6847, lr_0 = 1.6615e-04
Loss = 3.7645e-01, PNorm = 62.8632, GNorm = 2.1479, lr_0 = 1.6604e-04
Loss = 3.8962e-01, PNorm = 62.8646, GNorm = 1.5800, lr_0 = 1.6592e-04
Loss = 3.3571e-01, PNorm = 62.8652, GNorm = 1.4117, lr_0 = 1.6581e-04
Loss = 3.4439e-01, PNorm = 62.8647, GNorm = 1.5138, lr_0 = 1.6570e-04
Loss = 4.2084e-01, PNorm = 62.8673, GNorm = 1.3648, lr_0 = 1.6558e-04
Loss = 3.4224e-01, PNorm = 62.8699, GNorm = 1.0497, lr_0 = 1.6547e-04
Loss = 3.1938e-01, PNorm = 62.8709, GNorm = 1.5102, lr_0 = 1.6536e-04
Loss = 3.1293e-01, PNorm = 62.8715, GNorm = 1.2890, lr_0 = 1.6524e-04
Loss = 3.4271e-01, PNorm = 62.8719, GNorm = 1.3858, lr_0 = 1.6513e-04
Loss = 3.9074e-01, PNorm = 62.8732, GNorm = 1.3506, lr_0 = 1.6502e-04
Loss = 3.2770e-01, PNorm = 62.8760, GNorm = 1.1243, lr_0 = 1.6490e-04
Loss = 3.6604e-01, PNorm = 62.8757, GNorm = 1.4131, lr_0 = 1.6479e-04
Loss = 3.4581e-01, PNorm = 62.8774, GNorm = 1.5387, lr_0 = 1.6468e-04
Loss = 3.6730e-01, PNorm = 62.8818, GNorm = 1.6789, lr_0 = 1.6457e-04
Loss = 3.4288e-01, PNorm = 62.8817, GNorm = 1.3870, lr_0 = 1.6445e-04
Loss = 3.7513e-01, PNorm = 62.8816, GNorm = 1.2117, lr_0 = 1.6434e-04
Loss = 3.6609e-01, PNorm = 62.8816, GNorm = 1.6108, lr_0 = 1.6423e-04
Loss = 3.5702e-01, PNorm = 62.8832, GNorm = 1.4644, lr_0 = 1.6412e-04
Loss = 3.6679e-01, PNorm = 62.8859, GNorm = 1.9783, lr_0 = 1.6400e-04
Loss = 3.3483e-01, PNorm = 62.8871, GNorm = 1.5033, lr_0 = 1.6389e-04
Loss = 3.5176e-01, PNorm = 62.8887, GNorm = 1.6288, lr_0 = 1.6378e-04
Validation mae = 0.111115
Epoch 24
Loss = 3.5707e-01, PNorm = 62.8891, GNorm = 1.5464, lr_0 = 1.6367e-04
Loss = 3.4877e-01, PNorm = 62.8903, GNorm = 1.2682, lr_0 = 1.6355e-04
Loss = 3.2662e-01, PNorm = 62.8928, GNorm = 1.5136, lr_0 = 1.6344e-04
Loss = 3.3240e-01, PNorm = 62.8955, GNorm = 1.4566, lr_0 = 1.6333e-04
Loss = 2.9972e-01, PNorm = 62.8965, GNorm = 1.4984, lr_0 = 1.6322e-04
Loss = 3.6571e-01, PNorm = 62.8981, GNorm = 1.3590, lr_0 = 1.6311e-04
Loss = 3.4486e-01, PNorm = 62.8996, GNorm = 2.2375, lr_0 = 1.6299e-04
Loss = 4.1728e-01, PNorm = 62.8996, GNorm = 1.4103, lr_0 = 1.6288e-04
Loss = 3.2734e-01, PNorm = 62.9032, GNorm = 1.5768, lr_0 = 1.6277e-04
Loss = 3.8308e-01, PNorm = 62.9069, GNorm = 2.1084, lr_0 = 1.6266e-04
Loss = 3.6042e-01, PNorm = 62.9079, GNorm = 1.0983, lr_0 = 1.6255e-04
Loss = 2.8772e-01, PNorm = 62.9107, GNorm = 1.2892, lr_0 = 1.6244e-04
Loss = 3.5606e-01, PNorm = 62.9113, GNorm = 1.3882, lr_0 = 1.6233e-04
Loss = 3.4165e-01, PNorm = 62.9117, GNorm = 1.4190, lr_0 = 1.6221e-04
Loss = 3.8866e-01, PNorm = 62.9104, GNorm = 1.5057, lr_0 = 1.6210e-04
Loss = 3.4201e-01, PNorm = 62.9120, GNorm = 1.8751, lr_0 = 1.6199e-04
Loss = 3.4015e-01, PNorm = 62.9126, GNorm = 1.3582, lr_0 = 1.6188e-04
Loss = 3.5070e-01, PNorm = 62.9140, GNorm = 1.3376, lr_0 = 1.6177e-04
Loss = 3.0462e-01, PNorm = 62.9173, GNorm = 1.2837, lr_0 = 1.6166e-04
Loss = 3.8722e-01, PNorm = 62.9180, GNorm = 1.7787, lr_0 = 1.6155e-04
Loss = 3.4143e-01, PNorm = 62.9209, GNorm = 1.2405, lr_0 = 1.6144e-04
Loss = 3.6017e-01, PNorm = 62.9230, GNorm = 1.8468, lr_0 = 1.6133e-04
Loss = 4.1871e-01, PNorm = 62.9236, GNorm = 1.6762, lr_0 = 1.6122e-04
Loss = 4.2585e-01, PNorm = 62.9251, GNorm = 2.1853, lr_0 = 1.6111e-04
Loss = 4.2333e-01, PNorm = 62.9286, GNorm = 1.8243, lr_0 = 1.6100e-04
Loss = 3.4466e-01, PNorm = 62.9282, GNorm = 1.3826, lr_0 = 1.6089e-04
Loss = 3.5833e-01, PNorm = 62.9275, GNorm = 1.6889, lr_0 = 1.6078e-04
Loss = 3.9352e-01, PNorm = 62.9301, GNorm = 1.9731, lr_0 = 1.6067e-04
Loss = 3.4491e-01, PNorm = 62.9329, GNorm = 1.3406, lr_0 = 1.6056e-04
Loss = 3.9152e-01, PNorm = 62.9340, GNorm = 1.4881, lr_0 = 1.6045e-04
Loss = 2.6647e-01, PNorm = 62.9373, GNorm = 1.1630, lr_0 = 1.6034e-04
Loss = 3.9651e-01, PNorm = 62.9379, GNorm = 1.5580, lr_0 = 1.6023e-04
Loss = 3.5132e-01, PNorm = 62.9380, GNorm = 1.3126, lr_0 = 1.6012e-04
Loss = 3.5703e-01, PNorm = 62.9379, GNorm = 1.2704, lr_0 = 1.6001e-04
Loss = 3.3693e-01, PNorm = 62.9411, GNorm = 1.1975, lr_0 = 1.5990e-04
Loss = 4.0624e-01, PNorm = 62.9443, GNorm = 1.5533, lr_0 = 1.5979e-04
Loss = 3.3463e-01, PNorm = 62.9467, GNorm = 1.2143, lr_0 = 1.5968e-04
Loss = 3.6502e-01, PNorm = 62.9478, GNorm = 1.5255, lr_0 = 1.5957e-04
Loss = 2.9235e-01, PNorm = 62.9479, GNorm = 1.4896, lr_0 = 1.5946e-04
Loss = 3.1507e-01, PNorm = 62.9490, GNorm = 1.1805, lr_0 = 1.5935e-04
Loss = 4.2906e-01, PNorm = 62.9504, GNorm = 1.5584, lr_0 = 1.5924e-04
Loss = 3.2410e-01, PNorm = 62.9532, GNorm = 1.7225, lr_0 = 1.5913e-04
Loss = 3.1607e-01, PNorm = 62.9550, GNorm = 1.2607, lr_0 = 1.5902e-04
Loss = 3.5681e-01, PNorm = 62.9557, GNorm = 1.3093, lr_0 = 1.5891e-04
Loss = 3.4078e-01, PNorm = 62.9562, GNorm = 1.4773, lr_0 = 1.5880e-04
Loss = 3.6110e-01, PNorm = 62.9549, GNorm = 1.4711, lr_0 = 1.5870e-04
Loss = 3.9027e-01, PNorm = 62.9545, GNorm = 1.5945, lr_0 = 1.5859e-04
Loss = 3.5223e-01, PNorm = 62.9543, GNorm = 1.6555, lr_0 = 1.5848e-04
Loss = 3.6713e-01, PNorm = 62.9564, GNorm = 2.0637, lr_0 = 1.5837e-04
Loss = 3.3470e-01, PNorm = 62.9573, GNorm = 1.3973, lr_0 = 1.5826e-04
Loss = 2.7434e-01, PNorm = 62.9579, GNorm = 1.5479, lr_0 = 1.5815e-04
Loss = 3.2961e-01, PNorm = 62.9608, GNorm = 2.0602, lr_0 = 1.5804e-04
Loss = 3.6367e-01, PNorm = 62.9616, GNorm = 1.3650, lr_0 = 1.5794e-04
Loss = 3.7775e-01, PNorm = 62.9629, GNorm = 1.4254, lr_0 = 1.5783e-04
Loss = 3.7161e-01, PNorm = 62.9658, GNorm = 1.9200, lr_0 = 1.5772e-04
Loss = 3.9982e-01, PNorm = 62.9680, GNorm = 2.0000, lr_0 = 1.5761e-04
Loss = 3.6557e-01, PNorm = 62.9690, GNorm = 1.3728, lr_0 = 1.5750e-04
Loss = 4.5273e-01, PNorm = 62.9688, GNorm = 1.8004, lr_0 = 1.5740e-04
Loss = 3.4271e-01, PNorm = 62.9688, GNorm = 1.3882, lr_0 = 1.5729e-04
Loss = 3.3487e-01, PNorm = 62.9717, GNorm = 1.7452, lr_0 = 1.5718e-04
Loss = 3.1528e-01, PNorm = 62.9744, GNorm = 1.6620, lr_0 = 1.5707e-04
Loss = 3.5327e-01, PNorm = 62.9754, GNorm = 1.2732, lr_0 = 1.5697e-04
Loss = 3.2451e-01, PNorm = 62.9792, GNorm = 1.4137, lr_0 = 1.5686e-04
Loss = 3.3374e-01, PNorm = 62.9832, GNorm = 1.5021, lr_0 = 1.5675e-04
Loss = 3.2993e-01, PNorm = 62.9841, GNorm = 1.3879, lr_0 = 1.5664e-04
Loss = 3.6631e-01, PNorm = 62.9838, GNorm = 1.7817, lr_0 = 1.5654e-04
Loss = 3.4441e-01, PNorm = 62.9850, GNorm = 1.5736, lr_0 = 1.5643e-04
Loss = 3.6358e-01, PNorm = 62.9871, GNorm = 1.2592, lr_0 = 1.5632e-04
Loss = 3.8876e-01, PNorm = 62.9879, GNorm = 0.9691, lr_0 = 1.5621e-04
Loss = 3.4386e-01, PNorm = 62.9892, GNorm = 1.1273, lr_0 = 1.5611e-04
Loss = 3.1471e-01, PNorm = 62.9913, GNorm = 1.3348, lr_0 = 1.5600e-04
Loss = 3.8734e-01, PNorm = 62.9942, GNorm = 1.2763, lr_0 = 1.5589e-04
Loss = 3.5282e-01, PNorm = 62.9955, GNorm = 1.7191, lr_0 = 1.5579e-04
Loss = 3.4307e-01, PNorm = 62.9969, GNorm = 1.5255, lr_0 = 1.5568e-04
Loss = 3.5931e-01, PNorm = 63.0007, GNorm = 1.8214, lr_0 = 1.5557e-04
Loss = 3.8205e-01, PNorm = 63.0014, GNorm = 1.1313, lr_0 = 1.5547e-04
Loss = 3.2617e-01, PNorm = 63.0039, GNorm = 1.2109, lr_0 = 1.5536e-04
Loss = 3.3099e-01, PNorm = 63.0060, GNorm = 1.4680, lr_0 = 1.5525e-04
Loss = 3.3826e-01, PNorm = 63.0061, GNorm = 1.2311, lr_0 = 1.5515e-04
Loss = 3.6906e-01, PNorm = 63.0055, GNorm = 1.5480, lr_0 = 1.5504e-04
Loss = 3.7384e-01, PNorm = 63.0072, GNorm = 1.4992, lr_0 = 1.5493e-04
Loss = 3.5760e-01, PNorm = 63.0091, GNorm = 1.3699, lr_0 = 1.5483e-04
Loss = 3.0280e-01, PNorm = 63.0139, GNorm = 1.2363, lr_0 = 1.5472e-04
Loss = 3.2360e-01, PNorm = 63.0136, GNorm = 1.5859, lr_0 = 1.5462e-04
Loss = 3.6597e-01, PNorm = 63.0133, GNorm = 1.4693, lr_0 = 1.5451e-04
Loss = 3.3553e-01, PNorm = 63.0153, GNorm = 1.3816, lr_0 = 1.5440e-04
Loss = 3.7098e-01, PNorm = 63.0172, GNorm = 2.4481, lr_0 = 1.5430e-04
Loss = 3.6766e-01, PNorm = 63.0183, GNorm = 1.6829, lr_0 = 1.5419e-04
Loss = 3.6571e-01, PNorm = 63.0207, GNorm = 1.5024, lr_0 = 1.5409e-04
Loss = 3.2271e-01, PNorm = 63.0228, GNorm = 1.7039, lr_0 = 1.5398e-04
Loss = 3.4128e-01, PNorm = 63.0222, GNorm = 1.1331, lr_0 = 1.5388e-04
Loss = 3.3614e-01, PNorm = 63.0247, GNorm = 2.0155, lr_0 = 1.5377e-04
Loss = 3.9132e-01, PNorm = 63.0248, GNorm = 2.4211, lr_0 = 1.5367e-04
Loss = 3.6685e-01, PNorm = 63.0255, GNorm = 1.6071, lr_0 = 1.5356e-04
Loss = 3.9804e-01, PNorm = 63.0284, GNorm = 1.2639, lr_0 = 1.5346e-04
Loss = 3.3209e-01, PNorm = 63.0307, GNorm = 1.6316, lr_0 = 1.5335e-04
Loss = 3.8760e-01, PNorm = 63.0304, GNorm = 1.7876, lr_0 = 1.5325e-04
Loss = 3.6380e-01, PNorm = 63.0353, GNorm = 1.4094, lr_0 = 1.5314e-04
Loss = 3.0508e-01, PNorm = 63.0375, GNorm = 1.3962, lr_0 = 1.5304e-04
Loss = 3.4834e-01, PNorm = 63.0369, GNorm = 1.8885, lr_0 = 1.5293e-04
Loss = 4.2438e-01, PNorm = 63.0361, GNorm = 1.6701, lr_0 = 1.5283e-04
Loss = 3.5108e-01, PNorm = 63.0386, GNorm = 1.4820, lr_0 = 1.5272e-04
Loss = 3.3816e-01, PNorm = 63.0393, GNorm = 1.3899, lr_0 = 1.5262e-04
Loss = 2.8490e-01, PNorm = 63.0390, GNorm = 1.4870, lr_0 = 1.5251e-04
Loss = 3.3397e-01, PNorm = 63.0398, GNorm = 1.6085, lr_0 = 1.5241e-04
Loss = 3.7421e-01, PNorm = 63.0423, GNorm = 1.3431, lr_0 = 1.5230e-04
Loss = 3.5214e-01, PNorm = 63.0438, GNorm = 1.4117, lr_0 = 1.5220e-04
Loss = 3.0485e-01, PNorm = 63.0450, GNorm = 1.4335, lr_0 = 1.5209e-04
Loss = 3.8234e-01, PNorm = 63.0466, GNorm = 1.5663, lr_0 = 1.5199e-04
Loss = 3.4016e-01, PNorm = 63.0485, GNorm = 1.2545, lr_0 = 1.5189e-04
Loss = 3.4897e-01, PNorm = 63.0499, GNorm = 1.5874, lr_0 = 1.5178e-04
Loss = 3.4881e-01, PNorm = 63.0507, GNorm = 1.7449, lr_0 = 1.5168e-04
Loss = 3.1175e-01, PNorm = 63.0522, GNorm = 1.3624, lr_0 = 1.5157e-04
Loss = 2.9320e-01, PNorm = 63.0526, GNorm = 1.5464, lr_0 = 1.5147e-04
Loss = 3.3902e-01, PNorm = 63.0545, GNorm = 1.3579, lr_0 = 1.5137e-04
Loss = 3.0183e-01, PNorm = 63.0561, GNorm = 1.2957, lr_0 = 1.5126e-04
Loss = 3.6487e-01, PNorm = 63.0565, GNorm = 1.7036, lr_0 = 1.5116e-04
Loss = 3.2985e-01, PNorm = 63.0581, GNorm = 1.7660, lr_0 = 1.5106e-04
Loss = 3.4271e-01, PNorm = 63.0611, GNorm = 1.3761, lr_0 = 1.5095e-04
Loss = 3.4446e-01, PNorm = 63.0627, GNorm = 1.6558, lr_0 = 1.5085e-04
Validation mae = 0.111295
Epoch 25
Loss = 3.1999e-01, PNorm = 63.0629, GNorm = 1.3771, lr_0 = 1.5075e-04
Loss = 4.0013e-01, PNorm = 63.0637, GNorm = 1.4939, lr_0 = 1.5064e-04
Loss = 3.3216e-01, PNorm = 63.0657, GNorm = 1.3481, lr_0 = 1.5054e-04
Loss = 3.2208e-01, PNorm = 63.0656, GNorm = 1.8989, lr_0 = 1.5044e-04
Loss = 3.7071e-01, PNorm = 63.0678, GNorm = 1.5964, lr_0 = 1.5033e-04
Loss = 3.3176e-01, PNorm = 63.0701, GNorm = 0.9360, lr_0 = 1.5023e-04
Loss = 3.3580e-01, PNorm = 63.0704, GNorm = 1.3413, lr_0 = 1.5013e-04
Loss = 3.6535e-01, PNorm = 63.0699, GNorm = 1.4624, lr_0 = 1.5002e-04
Loss = 3.6742e-01, PNorm = 63.0708, GNorm = 2.1052, lr_0 = 1.4992e-04
Loss = 3.3268e-01, PNorm = 63.0701, GNorm = 1.3739, lr_0 = 1.4982e-04
Loss = 3.6731e-01, PNorm = 63.0714, GNorm = 1.1265, lr_0 = 1.4972e-04
Loss = 3.2756e-01, PNorm = 63.0755, GNorm = 1.1745, lr_0 = 1.4961e-04
Loss = 3.5281e-01, PNorm = 63.0789, GNorm = 1.1364, lr_0 = 1.4951e-04
Loss = 3.3000e-01, PNorm = 63.0815, GNorm = 1.7810, lr_0 = 1.4941e-04
Loss = 3.4077e-01, PNorm = 63.0827, GNorm = 1.5841, lr_0 = 1.4931e-04
Loss = 3.2888e-01, PNorm = 63.0833, GNorm = 1.3433, lr_0 = 1.4920e-04
Loss = 3.2915e-01, PNorm = 63.0823, GNorm = 1.8061, lr_0 = 1.4910e-04
Loss = 4.6934e-01, PNorm = 63.0834, GNorm = 1.9877, lr_0 = 1.4900e-04
Loss = 3.5792e-01, PNorm = 63.0840, GNorm = 1.6737, lr_0 = 1.4890e-04
Loss = 3.2951e-01, PNorm = 63.0840, GNorm = 1.6639, lr_0 = 1.4880e-04
Loss = 3.7814e-01, PNorm = 63.0858, GNorm = 1.0443, lr_0 = 1.4869e-04
Loss = 4.0723e-01, PNorm = 63.0856, GNorm = 1.4143, lr_0 = 1.4859e-04
Loss = 3.6771e-01, PNorm = 63.0875, GNorm = 1.3146, lr_0 = 1.4849e-04
Loss = 2.9966e-01, PNorm = 63.0898, GNorm = 0.9710, lr_0 = 1.4839e-04
Loss = 3.6859e-01, PNorm = 63.0916, GNorm = 1.0072, lr_0 = 1.4829e-04
Loss = 3.6390e-01, PNorm = 63.0920, GNorm = 1.4547, lr_0 = 1.4818e-04
Loss = 3.1719e-01, PNorm = 63.0932, GNorm = 1.2431, lr_0 = 1.4808e-04
Loss = 3.1876e-01, PNorm = 63.0954, GNorm = 1.1881, lr_0 = 1.4798e-04
Loss = 3.5694e-01, PNorm = 63.0979, GNorm = 2.4133, lr_0 = 1.4788e-04
Loss = 3.3301e-01, PNorm = 63.0978, GNorm = 1.3307, lr_0 = 1.4778e-04
Loss = 3.4066e-01, PNorm = 63.1002, GNorm = 1.4697, lr_0 = 1.4768e-04
Loss = 3.2170e-01, PNorm = 63.1021, GNorm = 1.8378, lr_0 = 1.4758e-04
Loss = 3.6743e-01, PNorm = 63.1026, GNorm = 1.4563, lr_0 = 1.4748e-04
Loss = 3.6219e-01, PNorm = 63.1040, GNorm = 1.2451, lr_0 = 1.4737e-04
Loss = 3.4446e-01, PNorm = 63.1054, GNorm = 1.3760, lr_0 = 1.4727e-04
Loss = 3.3589e-01, PNorm = 63.1044, GNorm = 1.5953, lr_0 = 1.4717e-04
Loss = 3.4961e-01, PNorm = 63.1053, GNorm = 1.3663, lr_0 = 1.4707e-04
Loss = 3.7431e-01, PNorm = 63.1060, GNorm = 1.7801, lr_0 = 1.4697e-04
Loss = 4.0997e-01, PNorm = 63.1061, GNorm = 1.5539, lr_0 = 1.4687e-04
Loss = 3.1698e-01, PNorm = 63.1079, GNorm = 1.1631, lr_0 = 1.4677e-04
Loss = 3.0135e-01, PNorm = 63.1123, GNorm = 1.1300, lr_0 = 1.4667e-04
Loss = 3.1950e-01, PNorm = 63.1159, GNorm = 1.4488, lr_0 = 1.4657e-04
Loss = 3.7309e-01, PNorm = 63.1170, GNorm = 2.1067, lr_0 = 1.4647e-04
Loss = 3.5381e-01, PNorm = 63.1189, GNorm = 1.5452, lr_0 = 1.4637e-04
Loss = 4.2108e-01, PNorm = 63.1193, GNorm = 1.7127, lr_0 = 1.4627e-04
Loss = 3.3404e-01, PNorm = 63.1183, GNorm = 1.9997, lr_0 = 1.4617e-04
Loss = 3.9930e-01, PNorm = 63.1206, GNorm = 1.2277, lr_0 = 1.4607e-04
Loss = 4.2831e-01, PNorm = 63.1264, GNorm = 1.5122, lr_0 = 1.4597e-04
Loss = 3.1088e-01, PNorm = 63.1311, GNorm = 1.8630, lr_0 = 1.4587e-04
Loss = 3.4833e-01, PNorm = 63.1338, GNorm = 1.2670, lr_0 = 1.4577e-04
Loss = 3.3115e-01, PNorm = 63.1353, GNorm = 1.6120, lr_0 = 1.4567e-04
Loss = 3.5024e-01, PNorm = 63.1358, GNorm = 2.7099, lr_0 = 1.4557e-04
Loss = 3.0530e-01, PNorm = 63.1393, GNorm = 1.6738, lr_0 = 1.4547e-04
Loss = 3.3288e-01, PNorm = 63.1411, GNorm = 1.4203, lr_0 = 1.4537e-04
Loss = 3.8678e-01, PNorm = 63.1413, GNorm = 1.8772, lr_0 = 1.4527e-04
Loss = 3.1666e-01, PNorm = 63.1426, GNorm = 1.4622, lr_0 = 1.4517e-04
Loss = 3.4640e-01, PNorm = 63.1426, GNorm = 1.4689, lr_0 = 1.4507e-04
Loss = 3.7695e-01, PNorm = 63.1408, GNorm = 1.8952, lr_0 = 1.4497e-04
Loss = 3.4907e-01, PNorm = 63.1423, GNorm = 1.3737, lr_0 = 1.4487e-04
Loss = 3.2525e-01, PNorm = 63.1451, GNorm = 1.7673, lr_0 = 1.4477e-04
Loss = 3.5351e-01, PNorm = 63.1477, GNorm = 1.4801, lr_0 = 1.4467e-04
Loss = 3.6802e-01, PNorm = 63.1516, GNorm = 1.6507, lr_0 = 1.4457e-04
Loss = 3.3271e-01, PNorm = 63.1511, GNorm = 1.4294, lr_0 = 1.4447e-04
Loss = 3.4573e-01, PNorm = 63.1523, GNorm = 1.0519, lr_0 = 1.4438e-04
Loss = 3.6483e-01, PNorm = 63.1548, GNorm = 2.2972, lr_0 = 1.4428e-04
Loss = 3.3557e-01, PNorm = 63.1569, GNorm = 1.3671, lr_0 = 1.4418e-04
Loss = 3.5599e-01, PNorm = 63.1582, GNorm = 1.8904, lr_0 = 1.4408e-04
Loss = 3.3716e-01, PNorm = 63.1585, GNorm = 1.3598, lr_0 = 1.4398e-04
Loss = 3.1544e-01, PNorm = 63.1604, GNorm = 1.7247, lr_0 = 1.4388e-04
Loss = 3.4854e-01, PNorm = 63.1612, GNorm = 1.6183, lr_0 = 1.4378e-04
Loss = 3.4622e-01, PNorm = 63.1628, GNorm = 1.6998, lr_0 = 1.4368e-04
Loss = 3.6412e-01, PNorm = 63.1655, GNorm = 1.5458, lr_0 = 1.4359e-04
Loss = 3.0156e-01, PNorm = 63.1665, GNorm = 1.2516, lr_0 = 1.4349e-04
Loss = 3.9167e-01, PNorm = 63.1680, GNorm = 1.9438, lr_0 = 1.4339e-04
Loss = 3.1946e-01, PNorm = 63.1677, GNorm = 1.4534, lr_0 = 1.4329e-04
Loss = 3.2106e-01, PNorm = 63.1688, GNorm = 1.9365, lr_0 = 1.4319e-04
Loss = 3.2606e-01, PNorm = 63.1683, GNorm = 1.7421, lr_0 = 1.4310e-04
Loss = 3.3030e-01, PNorm = 63.1708, GNorm = 1.6650, lr_0 = 1.4300e-04
Loss = 3.6997e-01, PNorm = 63.1719, GNorm = 1.5596, lr_0 = 1.4290e-04
Loss = 3.6113e-01, PNorm = 63.1711, GNorm = 1.5941, lr_0 = 1.4280e-04
Loss = 3.8597e-01, PNorm = 63.1726, GNorm = 1.4463, lr_0 = 1.4270e-04
Loss = 3.3805e-01, PNorm = 63.1750, GNorm = 1.0991, lr_0 = 1.4261e-04
Loss = 3.1072e-01, PNorm = 63.1756, GNorm = 1.4455, lr_0 = 1.4251e-04
Loss = 3.1372e-01, PNorm = 63.1782, GNorm = 1.1354, lr_0 = 1.4241e-04
Loss = 3.4900e-01, PNorm = 63.1810, GNorm = 1.6521, lr_0 = 1.4231e-04
Loss = 4.0935e-01, PNorm = 63.1819, GNorm = 1.3680, lr_0 = 1.4222e-04
Loss = 3.2972e-01, PNorm = 63.1826, GNorm = 1.0380, lr_0 = 1.4212e-04
Loss = 3.1962e-01, PNorm = 63.1832, GNorm = 1.3848, lr_0 = 1.4202e-04
Loss = 3.0805e-01, PNorm = 63.1837, GNorm = 1.1187, lr_0 = 1.4192e-04
Loss = 3.4968e-01, PNorm = 63.1854, GNorm = 1.4451, lr_0 = 1.4183e-04
Loss = 3.4914e-01, PNorm = 63.1871, GNorm = 1.1211, lr_0 = 1.4173e-04
Loss = 3.2540e-01, PNorm = 63.1872, GNorm = 1.4930, lr_0 = 1.4163e-04
Loss = 3.0892e-01, PNorm = 63.1874, GNorm = 1.5239, lr_0 = 1.4153e-04
Loss = 3.0389e-01, PNorm = 63.1877, GNorm = 1.4667, lr_0 = 1.4144e-04
Loss = 3.1876e-01, PNorm = 63.1898, GNorm = 1.4518, lr_0 = 1.4134e-04
Loss = 2.6089e-01, PNorm = 63.1916, GNorm = 1.2490, lr_0 = 1.4124e-04
Loss = 3.3416e-01, PNorm = 63.1932, GNorm = 1.2220, lr_0 = 1.4115e-04
Loss = 3.5052e-01, PNorm = 63.1942, GNorm = 1.7009, lr_0 = 1.4105e-04
Loss = 3.7144e-01, PNorm = 63.1954, GNorm = 1.5280, lr_0 = 1.4095e-04
Loss = 3.4778e-01, PNorm = 63.1980, GNorm = 1.6330, lr_0 = 1.4086e-04
Loss = 3.1875e-01, PNorm = 63.1986, GNorm = 1.4203, lr_0 = 1.4076e-04
Loss = 3.9070e-01, PNorm = 63.1982, GNorm = 1.2116, lr_0 = 1.4066e-04
Loss = 3.3435e-01, PNorm = 63.2009, GNorm = 1.5226, lr_0 = 1.4057e-04
Loss = 3.8049e-01, PNorm = 63.2021, GNorm = 1.5915, lr_0 = 1.4047e-04
Loss = 3.6707e-01, PNorm = 63.2027, GNorm = 1.7796, lr_0 = 1.4038e-04
Loss = 3.6929e-01, PNorm = 63.2042, GNorm = 1.4975, lr_0 = 1.4028e-04
Loss = 3.3731e-01, PNorm = 63.2068, GNorm = 1.6112, lr_0 = 1.4018e-04
Loss = 3.7285e-01, PNorm = 63.2090, GNorm = 1.7030, lr_0 = 1.4009e-04
Loss = 3.3752e-01, PNorm = 63.2112, GNorm = 1.2252, lr_0 = 1.3999e-04
Loss = 4.1414e-01, PNorm = 63.2149, GNorm = 1.2439, lr_0 = 1.3990e-04
Loss = 3.5503e-01, PNorm = 63.2157, GNorm = 1.4918, lr_0 = 1.3980e-04
Loss = 3.4936e-01, PNorm = 63.2168, GNorm = 1.7531, lr_0 = 1.3970e-04
Loss = 3.6835e-01, PNorm = 63.2188, GNorm = 1.4030, lr_0 = 1.3961e-04
Loss = 3.8845e-01, PNorm = 63.2168, GNorm = 2.2950, lr_0 = 1.3951e-04
Loss = 3.6744e-01, PNorm = 63.2176, GNorm = 1.4279, lr_0 = 1.3942e-04
Loss = 3.6137e-01, PNorm = 63.2204, GNorm = 1.6101, lr_0 = 1.3932e-04
Loss = 3.1933e-01, PNorm = 63.2220, GNorm = 1.5111, lr_0 = 1.3923e-04
Loss = 3.9108e-01, PNorm = 63.2228, GNorm = 1.4281, lr_0 = 1.3913e-04
Loss = 3.4979e-01, PNorm = 63.2236, GNorm = 1.5813, lr_0 = 1.3904e-04
Loss = 3.6321e-01, PNorm = 63.2266, GNorm = 2.1077, lr_0 = 1.3894e-04
Validation mae = 0.111128
Epoch 26
Loss = 3.7501e-01, PNorm = 63.2272, GNorm = 1.5700, lr_0 = 1.3884e-04
Loss = 4.2892e-01, PNorm = 63.2285, GNorm = 1.4557, lr_0 = 1.3875e-04
Loss = 3.6935e-01, PNorm = 63.2313, GNorm = 1.2182, lr_0 = 1.3865e-04
Loss = 3.8968e-01, PNorm = 63.2339, GNorm = 1.8167, lr_0 = 1.3856e-04
Loss = 3.4259e-01, PNorm = 63.2349, GNorm = 1.4692, lr_0 = 1.3846e-04
Loss = 3.7351e-01, PNorm = 63.2347, GNorm = 2.1715, lr_0 = 1.3837e-04
Loss = 3.3208e-01, PNorm = 63.2368, GNorm = 1.4735, lr_0 = 1.3828e-04
Loss = 3.1556e-01, PNorm = 63.2390, GNorm = 1.9039, lr_0 = 1.3818e-04
Loss = 4.0673e-01, PNorm = 63.2401, GNorm = 1.7876, lr_0 = 1.3809e-04
Loss = 3.1497e-01, PNorm = 63.2441, GNorm = 1.1077, lr_0 = 1.3799e-04
Loss = 3.8402e-01, PNorm = 63.2449, GNorm = 2.4098, lr_0 = 1.3790e-04
Loss = 2.9304e-01, PNorm = 63.2438, GNorm = 1.4981, lr_0 = 1.3780e-04
Loss = 3.8589e-01, PNorm = 63.2441, GNorm = 2.5233, lr_0 = 1.3771e-04
Loss = 3.8020e-01, PNorm = 63.2455, GNorm = 0.9444, lr_0 = 1.3761e-04
Loss = 3.2591e-01, PNorm = 63.2457, GNorm = 1.0183, lr_0 = 1.3752e-04
Loss = 3.5249e-01, PNorm = 63.2469, GNorm = 1.2683, lr_0 = 1.3742e-04
Loss = 3.6287e-01, PNorm = 63.2500, GNorm = 2.0565, lr_0 = 1.3733e-04
Loss = 3.7807e-01, PNorm = 63.2524, GNorm = 1.9280, lr_0 = 1.3724e-04
Loss = 3.3143e-01, PNorm = 63.2537, GNorm = 1.8888, lr_0 = 1.3714e-04
Loss = 3.5270e-01, PNorm = 63.2558, GNorm = 1.4177, lr_0 = 1.3705e-04
Loss = 3.4085e-01, PNorm = 63.2574, GNorm = 1.6106, lr_0 = 1.3695e-04
Loss = 3.2120e-01, PNorm = 63.2580, GNorm = 1.3404, lr_0 = 1.3686e-04
Loss = 3.4671e-01, PNorm = 63.2579, GNorm = 1.3793, lr_0 = 1.3677e-04
Loss = 3.0525e-01, PNorm = 63.2574, GNorm = 1.4736, lr_0 = 1.3667e-04
Loss = 3.1289e-01, PNorm = 63.2593, GNorm = 1.5478, lr_0 = 1.3658e-04
Loss = 3.9458e-01, PNorm = 63.2602, GNorm = 1.1748, lr_0 = 1.3649e-04
Loss = 3.3627e-01, PNorm = 63.2626, GNorm = 1.8288, lr_0 = 1.3639e-04
Loss = 3.6777e-01, PNorm = 63.2640, GNorm = 1.9070, lr_0 = 1.3630e-04
Loss = 3.2056e-01, PNorm = 63.2631, GNorm = 1.2431, lr_0 = 1.3621e-04
Loss = 3.4933e-01, PNorm = 63.2645, GNorm = 1.7602, lr_0 = 1.3611e-04
Loss = 3.5814e-01, PNorm = 63.2685, GNorm = 1.5932, lr_0 = 1.3602e-04
Loss = 3.4419e-01, PNorm = 63.2708, GNorm = 1.4260, lr_0 = 1.3593e-04
Loss = 3.2209e-01, PNorm = 63.2714, GNorm = 1.2196, lr_0 = 1.3583e-04
Loss = 3.6725e-01, PNorm = 63.2735, GNorm = 1.6185, lr_0 = 1.3574e-04
Loss = 2.8715e-01, PNorm = 63.2759, GNorm = 1.1311, lr_0 = 1.3565e-04
Loss = 3.6846e-01, PNorm = 63.2773, GNorm = 1.5050, lr_0 = 1.3555e-04
Loss = 3.1309e-01, PNorm = 63.2792, GNorm = 1.8507, lr_0 = 1.3546e-04
Loss = 3.3073e-01, PNorm = 63.2799, GNorm = 1.8882, lr_0 = 1.3537e-04
Loss = 3.1280e-01, PNorm = 63.2796, GNorm = 1.3906, lr_0 = 1.3528e-04
Loss = 3.2540e-01, PNorm = 63.2799, GNorm = 1.6964, lr_0 = 1.3518e-04
Loss = 3.3853e-01, PNorm = 63.2821, GNorm = 2.1132, lr_0 = 1.3509e-04
Loss = 3.3665e-01, PNorm = 63.2842, GNorm = 1.3812, lr_0 = 1.3500e-04
Loss = 3.4278e-01, PNorm = 63.2855, GNorm = 1.6163, lr_0 = 1.3491e-04
Loss = 3.2247e-01, PNorm = 63.2874, GNorm = 1.5913, lr_0 = 1.3481e-04
Loss = 3.4208e-01, PNorm = 63.2877, GNorm = 1.0843, lr_0 = 1.3472e-04
Loss = 3.6713e-01, PNorm = 63.2870, GNorm = 1.4418, lr_0 = 1.3463e-04
Loss = 3.1321e-01, PNorm = 63.2879, GNorm = 1.1081, lr_0 = 1.3454e-04
Loss = 2.9826e-01, PNorm = 63.2883, GNorm = 1.2202, lr_0 = 1.3444e-04
Loss = 3.7455e-01, PNorm = 63.2876, GNorm = 1.5047, lr_0 = 1.3435e-04
Loss = 3.1778e-01, PNorm = 63.2877, GNorm = 1.2998, lr_0 = 1.3426e-04
Loss = 3.5855e-01, PNorm = 63.2890, GNorm = 1.1139, lr_0 = 1.3417e-04
Loss = 3.3677e-01, PNorm = 63.2911, GNorm = 1.0118, lr_0 = 1.3408e-04
Loss = 3.3840e-01, PNorm = 63.2922, GNorm = 0.9815, lr_0 = 1.3398e-04
Loss = 3.2311e-01, PNorm = 63.2935, GNorm = 1.2495, lr_0 = 1.3389e-04
Loss = 3.3041e-01, PNorm = 63.2942, GNorm = 1.2675, lr_0 = 1.3380e-04
Loss = 3.6169e-01, PNorm = 63.2950, GNorm = 1.6195, lr_0 = 1.3371e-04
Loss = 3.1576e-01, PNorm = 63.2963, GNorm = 1.0473, lr_0 = 1.3362e-04
Loss = 3.3029e-01, PNorm = 63.3000, GNorm = 1.2901, lr_0 = 1.3353e-04
Loss = 3.4644e-01, PNorm = 63.3010, GNorm = 1.5483, lr_0 = 1.3343e-04
Loss = 3.5142e-01, PNorm = 63.3008, GNorm = 1.2506, lr_0 = 1.3334e-04
Loss = 3.6755e-01, PNorm = 63.3027, GNorm = 1.4118, lr_0 = 1.3325e-04
Loss = 3.5777e-01, PNorm = 63.3028, GNorm = 1.5114, lr_0 = 1.3316e-04
Loss = 3.5870e-01, PNorm = 63.3027, GNorm = 1.6390, lr_0 = 1.3307e-04
Loss = 3.6921e-01, PNorm = 63.3039, GNorm = 1.3762, lr_0 = 1.3298e-04
Loss = 3.4686e-01, PNorm = 63.3060, GNorm = 1.2690, lr_0 = 1.3289e-04
Loss = 3.1040e-01, PNorm = 63.3080, GNorm = 1.0810, lr_0 = 1.3280e-04
Loss = 4.0543e-01, PNorm = 63.3069, GNorm = 1.4448, lr_0 = 1.3270e-04
Loss = 3.4344e-01, PNorm = 63.3072, GNorm = 1.7479, lr_0 = 1.3261e-04
Loss = 3.8184e-01, PNorm = 63.3091, GNorm = 1.6844, lr_0 = 1.3252e-04
Loss = 3.2605e-01, PNorm = 63.3091, GNorm = 1.1898, lr_0 = 1.3243e-04
Loss = 3.4781e-01, PNorm = 63.3100, GNorm = 1.1795, lr_0 = 1.3234e-04
Loss = 3.1742e-01, PNorm = 63.3110, GNorm = 1.9016, lr_0 = 1.3225e-04
Loss = 3.1847e-01, PNorm = 63.3098, GNorm = 1.4399, lr_0 = 1.3216e-04
Loss = 3.1999e-01, PNorm = 63.3101, GNorm = 1.3524, lr_0 = 1.3207e-04
Loss = 3.5525e-01, PNorm = 63.3116, GNorm = 1.2368, lr_0 = 1.3198e-04
Loss = 4.1582e-01, PNorm = 63.3144, GNorm = 1.7707, lr_0 = 1.3189e-04
Loss = 3.3367e-01, PNorm = 63.3160, GNorm = 1.4879, lr_0 = 1.3180e-04
Loss = 3.5315e-01, PNorm = 63.3169, GNorm = 1.7139, lr_0 = 1.3171e-04
Loss = 3.9102e-01, PNorm = 63.3187, GNorm = 1.9115, lr_0 = 1.3162e-04
Loss = 3.6403e-01, PNorm = 63.3210, GNorm = 1.2331, lr_0 = 1.3153e-04
Loss = 3.2292e-01, PNorm = 63.3217, GNorm = 1.4941, lr_0 = 1.3144e-04
Loss = 4.0885e-01, PNorm = 63.3235, GNorm = 1.9926, lr_0 = 1.3135e-04
Loss = 3.6153e-01, PNorm = 63.3256, GNorm = 1.2604, lr_0 = 1.3126e-04
Loss = 3.5012e-01, PNorm = 63.3266, GNorm = 1.4368, lr_0 = 1.3117e-04
Loss = 3.4812e-01, PNorm = 63.3274, GNorm = 1.6498, lr_0 = 1.3108e-04
Loss = 3.6308e-01, PNorm = 63.3283, GNorm = 1.6077, lr_0 = 1.3099e-04
Loss = 3.3478e-01, PNorm = 63.3303, GNorm = 0.9479, lr_0 = 1.3090e-04
Loss = 3.5359e-01, PNorm = 63.3309, GNorm = 1.6889, lr_0 = 1.3081e-04
Loss = 3.4604e-01, PNorm = 63.3330, GNorm = 1.1700, lr_0 = 1.3072e-04
Loss = 3.5691e-01, PNorm = 63.3338, GNorm = 1.3339, lr_0 = 1.3063e-04
Loss = 3.2881e-01, PNorm = 63.3360, GNorm = 1.1310, lr_0 = 1.3054e-04
Loss = 3.3433e-01, PNorm = 63.3377, GNorm = 1.1466, lr_0 = 1.3045e-04
Loss = 3.3171e-01, PNorm = 63.3396, GNorm = 1.8016, lr_0 = 1.3036e-04
Loss = 3.4318e-01, PNorm = 63.3414, GNorm = 2.0416, lr_0 = 1.3027e-04
Loss = 3.9072e-01, PNorm = 63.3426, GNorm = 1.4634, lr_0 = 1.3018e-04
Loss = 3.8040e-01, PNorm = 63.3454, GNorm = 1.7146, lr_0 = 1.3009e-04
Loss = 3.5753e-01, PNorm = 63.3474, GNorm = 1.2051, lr_0 = 1.3000e-04
Loss = 3.4735e-01, PNorm = 63.3493, GNorm = 1.3730, lr_0 = 1.2992e-04
Loss = 3.6594e-01, PNorm = 63.3524, GNorm = 1.2316, lr_0 = 1.2983e-04
Loss = 3.3505e-01, PNorm = 63.3560, GNorm = 1.1544, lr_0 = 1.2974e-04
Loss = 3.8011e-01, PNorm = 63.3552, GNorm = 1.3641, lr_0 = 1.2965e-04
Loss = 3.5692e-01, PNorm = 63.3566, GNorm = 1.1833, lr_0 = 1.2956e-04
Loss = 3.8058e-01, PNorm = 63.3575, GNorm = 1.5864, lr_0 = 1.2947e-04
Loss = 3.4762e-01, PNorm = 63.3569, GNorm = 2.1239, lr_0 = 1.2938e-04
Loss = 3.5191e-01, PNorm = 63.3559, GNorm = 1.3038, lr_0 = 1.2929e-04
Loss = 3.5486e-01, PNorm = 63.3551, GNorm = 1.8351, lr_0 = 1.2921e-04
Loss = 4.0253e-01, PNorm = 63.3566, GNorm = 2.3502, lr_0 = 1.2912e-04
Loss = 3.2237e-01, PNorm = 63.3602, GNorm = 1.2511, lr_0 = 1.2903e-04
Loss = 3.2621e-01, PNorm = 63.3610, GNorm = 1.1492, lr_0 = 1.2894e-04
Loss = 3.4560e-01, PNorm = 63.3604, GNorm = 1.4937, lr_0 = 1.2885e-04
Loss = 3.5620e-01, PNorm = 63.3606, GNorm = 1.1782, lr_0 = 1.2876e-04
Loss = 3.3331e-01, PNorm = 63.3614, GNorm = 1.4784, lr_0 = 1.2867e-04
Loss = 3.2261e-01, PNorm = 63.3611, GNorm = 1.8433, lr_0 = 1.2859e-04
Loss = 3.1275e-01, PNorm = 63.3608, GNorm = 1.6960, lr_0 = 1.2850e-04
Loss = 3.1684e-01, PNorm = 63.3620, GNorm = 1.1853, lr_0 = 1.2841e-04
Loss = 3.5044e-01, PNorm = 63.3641, GNorm = 1.4987, lr_0 = 1.2832e-04
Loss = 3.8000e-01, PNorm = 63.3638, GNorm = 1.6487, lr_0 = 1.2823e-04
Loss = 3.5062e-01, PNorm = 63.3628, GNorm = 1.2987, lr_0 = 1.2815e-04
Loss = 3.3046e-01, PNorm = 63.3631, GNorm = 1.8097, lr_0 = 1.2806e-04
Loss = 3.3893e-01, PNorm = 63.3659, GNorm = 2.3471, lr_0 = 1.2797e-04
Validation mae = 0.111271
Epoch 27
Loss = 2.8972e-01, PNorm = 63.3669, GNorm = 1.3428, lr_0 = 1.2788e-04
Loss = 3.2941e-01, PNorm = 63.3672, GNorm = 1.7240, lr_0 = 1.2780e-04
Loss = 3.2445e-01, PNorm = 63.3693, GNorm = 1.3103, lr_0 = 1.2771e-04
Loss = 3.5386e-01, PNorm = 63.3710, GNorm = 1.3852, lr_0 = 1.2762e-04
Loss = 3.9548e-01, PNorm = 63.3714, GNorm = 1.2925, lr_0 = 1.2753e-04
Loss = 3.5655e-01, PNorm = 63.3732, GNorm = 1.8749, lr_0 = 1.2745e-04
Loss = 3.4209e-01, PNorm = 63.3742, GNorm = 1.4424, lr_0 = 1.2736e-04
Loss = 3.5146e-01, PNorm = 63.3755, GNorm = 1.3137, lr_0 = 1.2727e-04
Loss = 3.3873e-01, PNorm = 63.3773, GNorm = 1.4611, lr_0 = 1.2718e-04
Loss = 3.5671e-01, PNorm = 63.3788, GNorm = 1.6000, lr_0 = 1.2710e-04
Loss = 3.2873e-01, PNorm = 63.3798, GNorm = 1.4877, lr_0 = 1.2701e-04
Loss = 3.4024e-01, PNorm = 63.3792, GNorm = 1.8117, lr_0 = 1.2692e-04
Loss = 3.0505e-01, PNorm = 63.3791, GNorm = 1.4850, lr_0 = 1.2684e-04
Loss = 3.2829e-01, PNorm = 63.3802, GNorm = 1.4985, lr_0 = 1.2675e-04
Loss = 3.5027e-01, PNorm = 63.3814, GNorm = 1.3240, lr_0 = 1.2666e-04
Loss = 3.7835e-01, PNorm = 63.3834, GNorm = 1.5210, lr_0 = 1.2658e-04
Loss = 4.0264e-01, PNorm = 63.3854, GNorm = 1.4354, lr_0 = 1.2649e-04
Loss = 3.3845e-01, PNorm = 63.3875, GNorm = 1.8361, lr_0 = 1.2640e-04
Loss = 3.3143e-01, PNorm = 63.3899, GNorm = 1.0493, lr_0 = 1.2632e-04
Loss = 3.5461e-01, PNorm = 63.3906, GNorm = 1.0300, lr_0 = 1.2623e-04
Loss = 3.7527e-01, PNorm = 63.3911, GNorm = 1.5529, lr_0 = 1.2614e-04
Loss = 3.7664e-01, PNorm = 63.3917, GNorm = 1.7861, lr_0 = 1.2606e-04
Loss = 3.4533e-01, PNorm = 63.3942, GNorm = 1.3509, lr_0 = 1.2597e-04
Loss = 3.8790e-01, PNorm = 63.3953, GNorm = 1.4418, lr_0 = 1.2588e-04
Loss = 3.3207e-01, PNorm = 63.3954, GNorm = 1.7385, lr_0 = 1.2580e-04
Loss = 3.9488e-01, PNorm = 63.3963, GNorm = 1.6656, lr_0 = 1.2571e-04
Loss = 3.5688e-01, PNorm = 63.3989, GNorm = 1.6365, lr_0 = 1.2563e-04
Loss = 3.4959e-01, PNorm = 63.4020, GNorm = 1.7074, lr_0 = 1.2554e-04
Loss = 3.4807e-01, PNorm = 63.4018, GNorm = 1.4764, lr_0 = 1.2545e-04
Loss = 3.2017e-01, PNorm = 63.4017, GNorm = 1.4206, lr_0 = 1.2537e-04
Loss = 3.6199e-01, PNorm = 63.4032, GNorm = 1.3445, lr_0 = 1.2528e-04
Loss = 3.6902e-01, PNorm = 63.4044, GNorm = 1.8324, lr_0 = 1.2520e-04
Loss = 3.7584e-01, PNorm = 63.4050, GNorm = 1.5282, lr_0 = 1.2511e-04
Loss = 3.4291e-01, PNorm = 63.4054, GNorm = 1.0510, lr_0 = 1.2502e-04
Loss = 3.3876e-01, PNorm = 63.4062, GNorm = 2.0743, lr_0 = 1.2494e-04
Loss = 4.0056e-01, PNorm = 63.4088, GNorm = 1.6962, lr_0 = 1.2485e-04
Loss = 3.0008e-01, PNorm = 63.4117, GNorm = 1.0839, lr_0 = 1.2477e-04
Loss = 3.2853e-01, PNorm = 63.4139, GNorm = 1.0247, lr_0 = 1.2468e-04
Loss = 3.1695e-01, PNorm = 63.4146, GNorm = 1.6121, lr_0 = 1.2460e-04
Loss = 3.3294e-01, PNorm = 63.4156, GNorm = 1.5205, lr_0 = 1.2451e-04
Loss = 3.4497e-01, PNorm = 63.4151, GNorm = 1.4534, lr_0 = 1.2443e-04
Loss = 3.0837e-01, PNorm = 63.4148, GNorm = 1.1082, lr_0 = 1.2434e-04
Loss = 2.9462e-01, PNorm = 63.4153, GNorm = 1.3601, lr_0 = 1.2426e-04
Loss = 3.4276e-01, PNorm = 63.4171, GNorm = 1.4167, lr_0 = 1.2417e-04
Loss = 2.7656e-01, PNorm = 63.4173, GNorm = 1.3368, lr_0 = 1.2409e-04
Loss = 3.0433e-01, PNorm = 63.4190, GNorm = 1.2952, lr_0 = 1.2400e-04
Loss = 3.0543e-01, PNorm = 63.4216, GNorm = 1.0517, lr_0 = 1.2392e-04
Loss = 3.5394e-01, PNorm = 63.4220, GNorm = 1.4406, lr_0 = 1.2383e-04
Loss = 3.6745e-01, PNorm = 63.4226, GNorm = 1.6933, lr_0 = 1.2375e-04
Loss = 3.8695e-01, PNorm = 63.4250, GNorm = 1.9364, lr_0 = 1.2366e-04
Loss = 3.4421e-01, PNorm = 63.4267, GNorm = 1.0396, lr_0 = 1.2358e-04
Loss = 4.0544e-01, PNorm = 63.4260, GNorm = 1.4154, lr_0 = 1.2349e-04
Loss = 3.4073e-01, PNorm = 63.4260, GNorm = 1.6149, lr_0 = 1.2341e-04
Loss = 3.7732e-01, PNorm = 63.4274, GNorm = 1.3788, lr_0 = 1.2332e-04
Loss = 2.9523e-01, PNorm = 63.4287, GNorm = 1.3202, lr_0 = 1.2324e-04
Loss = 3.1243e-01, PNorm = 63.4311, GNorm = 1.3474, lr_0 = 1.2315e-04
Loss = 3.4974e-01, PNorm = 63.4328, GNorm = 2.9679, lr_0 = 1.2307e-04
Loss = 3.2783e-01, PNorm = 63.4338, GNorm = 1.0618, lr_0 = 1.2298e-04
Loss = 3.2603e-01, PNorm = 63.4351, GNorm = 1.5406, lr_0 = 1.2290e-04
Loss = 3.6218e-01, PNorm = 63.4376, GNorm = 1.3264, lr_0 = 1.2282e-04
Loss = 3.7236e-01, PNorm = 63.4373, GNorm = 1.6905, lr_0 = 1.2273e-04
Loss = 3.5582e-01, PNorm = 63.4383, GNorm = 1.5267, lr_0 = 1.2265e-04
Loss = 3.2422e-01, PNorm = 63.4396, GNorm = 1.4416, lr_0 = 1.2256e-04
Loss = 3.4092e-01, PNorm = 63.4390, GNorm = 1.2382, lr_0 = 1.2248e-04
Loss = 2.8751e-01, PNorm = 63.4382, GNorm = 1.7114, lr_0 = 1.2240e-04
Loss = 3.2560e-01, PNorm = 63.4384, GNorm = 1.1686, lr_0 = 1.2231e-04
Loss = 3.4949e-01, PNorm = 63.4379, GNorm = 1.6147, lr_0 = 1.2223e-04
Loss = 3.4855e-01, PNorm = 63.4388, GNorm = 1.1781, lr_0 = 1.2214e-04
Loss = 3.4636e-01, PNorm = 63.4417, GNorm = 1.1788, lr_0 = 1.2206e-04
Loss = 3.1127e-01, PNorm = 63.4447, GNorm = 1.4392, lr_0 = 1.2198e-04
Loss = 3.8941e-01, PNorm = 63.4451, GNorm = 1.5831, lr_0 = 1.2189e-04
Loss = 3.3581e-01, PNorm = 63.4452, GNorm = 1.4567, lr_0 = 1.2181e-04
Loss = 3.9258e-01, PNorm = 63.4455, GNorm = 1.3914, lr_0 = 1.2173e-04
Loss = 3.4399e-01, PNorm = 63.4477, GNorm = 2.3023, lr_0 = 1.2164e-04
Loss = 3.5294e-01, PNorm = 63.4486, GNorm = 1.6523, lr_0 = 1.2156e-04
Loss = 3.1890e-01, PNorm = 63.4494, GNorm = 1.5880, lr_0 = 1.2148e-04
Loss = 3.4968e-01, PNorm = 63.4499, GNorm = 1.3138, lr_0 = 1.2139e-04
Loss = 3.5709e-01, PNorm = 63.4508, GNorm = 1.6459, lr_0 = 1.2131e-04
Loss = 3.6352e-01, PNorm = 63.4521, GNorm = 1.5589, lr_0 = 1.2123e-04
Loss = 3.2605e-01, PNorm = 63.4531, GNorm = 1.3544, lr_0 = 1.2114e-04
Loss = 3.3332e-01, PNorm = 63.4538, GNorm = 1.3638, lr_0 = 1.2106e-04
Loss = 3.4792e-01, PNorm = 63.4569, GNorm = 1.7418, lr_0 = 1.2098e-04
Loss = 3.0881e-01, PNorm = 63.4590, GNorm = 1.0121, lr_0 = 1.2090e-04
Loss = 3.1032e-01, PNorm = 63.4607, GNorm = 1.1711, lr_0 = 1.2081e-04
Loss = 3.5739e-01, PNorm = 63.4626, GNorm = 1.5474, lr_0 = 1.2073e-04
Loss = 3.1221e-01, PNorm = 63.4645, GNorm = 1.3903, lr_0 = 1.2065e-04
Loss = 3.3287e-01, PNorm = 63.4653, GNorm = 1.4197, lr_0 = 1.2056e-04
Loss = 3.6331e-01, PNorm = 63.4659, GNorm = 2.1395, lr_0 = 1.2048e-04
Loss = 4.1420e-01, PNorm = 63.4677, GNorm = 1.7536, lr_0 = 1.2040e-04
Loss = 3.5274e-01, PNorm = 63.4696, GNorm = 1.6343, lr_0 = 1.2032e-04
Loss = 3.2748e-01, PNorm = 63.4710, GNorm = 0.9717, lr_0 = 1.2023e-04
Loss = 3.1182e-01, PNorm = 63.4717, GNorm = 1.3909, lr_0 = 1.2015e-04
Loss = 3.7620e-01, PNorm = 63.4732, GNorm = 1.1661, lr_0 = 1.2007e-04
Loss = 3.4870e-01, PNorm = 63.4758, GNorm = 1.5269, lr_0 = 1.1999e-04
Loss = 3.3278e-01, PNorm = 63.4786, GNorm = 1.5167, lr_0 = 1.1991e-04
Loss = 3.5540e-01, PNorm = 63.4805, GNorm = 1.6143, lr_0 = 1.1982e-04
Loss = 3.6556e-01, PNorm = 63.4832, GNorm = 1.7523, lr_0 = 1.1974e-04
Loss = 3.7826e-01, PNorm = 63.4835, GNorm = 1.4605, lr_0 = 1.1966e-04
Loss = 3.2171e-01, PNorm = 63.4834, GNorm = 2.2463, lr_0 = 1.1958e-04
Loss = 3.5372e-01, PNorm = 63.4842, GNorm = 1.4305, lr_0 = 1.1950e-04
Loss = 3.2902e-01, PNorm = 63.4857, GNorm = 1.5692, lr_0 = 1.1941e-04
Loss = 3.6113e-01, PNorm = 63.4872, GNorm = 1.6012, lr_0 = 1.1933e-04
Loss = 3.6053e-01, PNorm = 63.4881, GNorm = 1.5765, lr_0 = 1.1925e-04
Loss = 3.2831e-01, PNorm = 63.4888, GNorm = 1.3630, lr_0 = 1.1917e-04
Loss = 4.0283e-01, PNorm = 63.4892, GNorm = 1.2499, lr_0 = 1.1909e-04
Loss = 3.9769e-01, PNorm = 63.4890, GNorm = 1.8991, lr_0 = 1.1901e-04
Loss = 3.4787e-01, PNorm = 63.4914, GNorm = 1.4058, lr_0 = 1.1892e-04
Loss = 3.5708e-01, PNorm = 63.4937, GNorm = 2.4093, lr_0 = 1.1884e-04
Loss = 2.7039e-01, PNorm = 63.4956, GNorm = 1.2641, lr_0 = 1.1876e-04
Loss = 3.2350e-01, PNorm = 63.4976, GNorm = 1.3221, lr_0 = 1.1868e-04
Loss = 3.7205e-01, PNorm = 63.4985, GNorm = 1.9472, lr_0 = 1.1860e-04
Loss = 3.7801e-01, PNorm = 63.4982, GNorm = 2.5453, lr_0 = 1.1852e-04
Loss = 3.6033e-01, PNorm = 63.4973, GNorm = 1.4685, lr_0 = 1.1844e-04
Loss = 3.5363e-01, PNorm = 63.4970, GNorm = 1.7382, lr_0 = 1.1835e-04
Loss = 3.7737e-01, PNorm = 63.4982, GNorm = 1.6194, lr_0 = 1.1827e-04
Loss = 3.0165e-01, PNorm = 63.5004, GNorm = 1.0667, lr_0 = 1.1819e-04
Loss = 4.0173e-01, PNorm = 63.5015, GNorm = 1.6995, lr_0 = 1.1811e-04
Loss = 3.5260e-01, PNorm = 63.5027, GNorm = 1.6140, lr_0 = 1.1803e-04
Loss = 3.1077e-01, PNorm = 63.5052, GNorm = 1.2819, lr_0 = 1.1795e-04
Loss = 3.3338e-01, PNorm = 63.5061, GNorm = 1.7138, lr_0 = 1.1787e-04
Validation mae = 0.110936
Epoch 28
Loss = 3.5546e-01, PNorm = 63.5069, GNorm = 1.4568, lr_0 = 1.1779e-04
Loss = 3.2982e-01, PNorm = 63.5082, GNorm = 1.6298, lr_0 = 1.1771e-04
Loss = 3.3884e-01, PNorm = 63.5101, GNorm = 1.8315, lr_0 = 1.1763e-04
Loss = 3.4005e-01, PNorm = 63.5113, GNorm = 2.4920, lr_0 = 1.1755e-04
Loss = 3.3467e-01, PNorm = 63.5118, GNorm = 1.8618, lr_0 = 1.1747e-04
Loss = 3.2111e-01, PNorm = 63.5132, GNorm = 1.6286, lr_0 = 1.1739e-04
Loss = 3.6051e-01, PNorm = 63.5138, GNorm = 1.8570, lr_0 = 1.1730e-04
Loss = 3.3882e-01, PNorm = 63.5144, GNorm = 1.7723, lr_0 = 1.1722e-04
Loss = 3.4833e-01, PNorm = 63.5146, GNorm = 1.0898, lr_0 = 1.1714e-04
Loss = 3.5423e-01, PNorm = 63.5160, GNorm = 1.3048, lr_0 = 1.1706e-04
Loss = 3.2571e-01, PNorm = 63.5161, GNorm = 1.0532, lr_0 = 1.1698e-04
Loss = 3.5823e-01, PNorm = 63.5164, GNorm = 1.6447, lr_0 = 1.1690e-04
Loss = 3.5617e-01, PNorm = 63.5180, GNorm = 1.6080, lr_0 = 1.1682e-04
Loss = 3.1356e-01, PNorm = 63.5168, GNorm = 1.5222, lr_0 = 1.1674e-04
Loss = 3.5158e-01, PNorm = 63.5169, GNorm = 1.9523, lr_0 = 1.1666e-04
Loss = 3.8849e-01, PNorm = 63.5170, GNorm = 1.6808, lr_0 = 1.1658e-04
Loss = 3.3686e-01, PNorm = 63.5179, GNorm = 1.5908, lr_0 = 1.1650e-04
Loss = 3.3127e-01, PNorm = 63.5185, GNorm = 1.4102, lr_0 = 1.1642e-04
Loss = 3.7385e-01, PNorm = 63.5198, GNorm = 1.6346, lr_0 = 1.1634e-04
Loss = 3.1969e-01, PNorm = 63.5216, GNorm = 1.3015, lr_0 = 1.1626e-04
Loss = 3.3837e-01, PNorm = 63.5217, GNorm = 1.4966, lr_0 = 1.1618e-04
Loss = 3.5225e-01, PNorm = 63.5222, GNorm = 1.1088, lr_0 = 1.1611e-04
Loss = 3.7014e-01, PNorm = 63.5230, GNorm = 1.2486, lr_0 = 1.1603e-04
Loss = 3.5281e-01, PNorm = 63.5223, GNorm = 1.3910, lr_0 = 1.1595e-04
Loss = 3.5815e-01, PNorm = 63.5242, GNorm = 1.9481, lr_0 = 1.1587e-04
Loss = 3.8779e-01, PNorm = 63.5265, GNorm = 2.3433, lr_0 = 1.1579e-04
Loss = 3.3296e-01, PNorm = 63.5280, GNorm = 1.0091, lr_0 = 1.1571e-04
Loss = 3.6361e-01, PNorm = 63.5279, GNorm = 1.6672, lr_0 = 1.1563e-04
Loss = 3.6900e-01, PNorm = 63.5275, GNorm = 2.3886, lr_0 = 1.1555e-04
Loss = 3.5472e-01, PNorm = 63.5281, GNorm = 1.7168, lr_0 = 1.1547e-04
Loss = 3.6658e-01, PNorm = 63.5282, GNorm = 1.5662, lr_0 = 1.1539e-04
Loss = 3.4563e-01, PNorm = 63.5301, GNorm = 1.3700, lr_0 = 1.1531e-04
Loss = 3.5676e-01, PNorm = 63.5293, GNorm = 1.3508, lr_0 = 1.1523e-04
Loss = 2.9352e-01, PNorm = 63.5277, GNorm = 1.0139, lr_0 = 1.1515e-04
Loss = 3.5996e-01, PNorm = 63.5281, GNorm = 1.9851, lr_0 = 1.1508e-04
Loss = 3.7408e-01, PNorm = 63.5295, GNorm = 1.4561, lr_0 = 1.1500e-04
Loss = 3.3597e-01, PNorm = 63.5300, GNorm = 1.7910, lr_0 = 1.1492e-04
Loss = 3.1894e-01, PNorm = 63.5312, GNorm = 1.3272, lr_0 = 1.1484e-04
Loss = 3.3884e-01, PNorm = 63.5310, GNorm = 2.0433, lr_0 = 1.1476e-04
Loss = 3.6948e-01, PNorm = 63.5326, GNorm = 1.4009, lr_0 = 1.1468e-04
Loss = 3.4874e-01, PNorm = 63.5360, GNorm = 1.3767, lr_0 = 1.1460e-04
Loss = 3.3139e-01, PNorm = 63.5365, GNorm = 1.1359, lr_0 = 1.1452e-04
Loss = 3.2649e-01, PNorm = 63.5374, GNorm = 1.4075, lr_0 = 1.1445e-04
Loss = 3.5300e-01, PNorm = 63.5392, GNorm = 1.3060, lr_0 = 1.1437e-04
Loss = 3.7869e-01, PNorm = 63.5386, GNorm = 1.5514, lr_0 = 1.1429e-04
Loss = 3.0456e-01, PNorm = 63.5397, GNorm = 1.1137, lr_0 = 1.1421e-04
Loss = 3.5216e-01, PNorm = 63.5413, GNorm = 1.5049, lr_0 = 1.1413e-04
Loss = 3.6033e-01, PNorm = 63.5412, GNorm = 1.3456, lr_0 = 1.1405e-04
Loss = 3.5163e-01, PNorm = 63.5434, GNorm = 1.3818, lr_0 = 1.1398e-04
Loss = 3.4229e-01, PNorm = 63.5459, GNorm = 1.4029, lr_0 = 1.1390e-04
Loss = 3.7434e-01, PNorm = 63.5461, GNorm = 1.3900, lr_0 = 1.1382e-04
Loss = 3.5972e-01, PNorm = 63.5476, GNorm = 1.3103, lr_0 = 1.1374e-04
Loss = 3.0175e-01, PNorm = 63.5496, GNorm = 1.0900, lr_0 = 1.1366e-04
Loss = 3.8009e-01, PNorm = 63.5515, GNorm = 1.4597, lr_0 = 1.1359e-04
Loss = 3.2595e-01, PNorm = 63.5528, GNorm = 1.3774, lr_0 = 1.1351e-04
Loss = 3.4657e-01, PNorm = 63.5545, GNorm = 1.5067, lr_0 = 1.1343e-04
Loss = 3.6137e-01, PNorm = 63.5545, GNorm = 1.8681, lr_0 = 1.1335e-04
Loss = 3.9095e-01, PNorm = 63.5517, GNorm = 1.2686, lr_0 = 1.1328e-04
Loss = 3.3935e-01, PNorm = 63.5502, GNorm = 1.6277, lr_0 = 1.1320e-04
Loss = 3.5543e-01, PNorm = 63.5504, GNorm = 1.6194, lr_0 = 1.1312e-04
Loss = 3.2541e-01, PNorm = 63.5515, GNorm = 1.7330, lr_0 = 1.1304e-04
Loss = 3.5802e-01, PNorm = 63.5523, GNorm = 1.6310, lr_0 = 1.1297e-04
Loss = 3.4002e-01, PNorm = 63.5535, GNorm = 1.4584, lr_0 = 1.1289e-04
Loss = 3.4865e-01, PNorm = 63.5559, GNorm = 1.4058, lr_0 = 1.1281e-04
Loss = 3.9197e-01, PNorm = 63.5588, GNorm = 2.2460, lr_0 = 1.1273e-04
Loss = 4.0458e-01, PNorm = 63.5579, GNorm = 1.7927, lr_0 = 1.1266e-04
Loss = 3.1726e-01, PNorm = 63.5579, GNorm = 1.1652, lr_0 = 1.1258e-04
Loss = 3.3937e-01, PNorm = 63.5592, GNorm = 1.8501, lr_0 = 1.1250e-04
Loss = 3.2245e-01, PNorm = 63.5612, GNorm = 1.6404, lr_0 = 1.1243e-04
Loss = 3.5634e-01, PNorm = 63.5618, GNorm = 1.3270, lr_0 = 1.1235e-04
Loss = 3.3899e-01, PNorm = 63.5623, GNorm = 1.4855, lr_0 = 1.1227e-04
Loss = 3.7588e-01, PNorm = 63.5641, GNorm = 2.0316, lr_0 = 1.1219e-04
Loss = 3.5061e-01, PNorm = 63.5659, GNorm = 1.2794, lr_0 = 1.1212e-04
Loss = 3.0288e-01, PNorm = 63.5665, GNorm = 1.3412, lr_0 = 1.1204e-04
Loss = 3.3437e-01, PNorm = 63.5666, GNorm = 1.3804, lr_0 = 1.1196e-04
Loss = 2.9970e-01, PNorm = 63.5677, GNorm = 1.2710, lr_0 = 1.1189e-04
Loss = 3.4068e-01, PNorm = 63.5687, GNorm = 1.4318, lr_0 = 1.1181e-04
Loss = 3.0438e-01, PNorm = 63.5689, GNorm = 1.3891, lr_0 = 1.1173e-04
Loss = 3.0942e-01, PNorm = 63.5703, GNorm = 1.2084, lr_0 = 1.1166e-04
Loss = 3.3864e-01, PNorm = 63.5710, GNorm = 1.2757, lr_0 = 1.1158e-04
Loss = 3.1786e-01, PNorm = 63.5744, GNorm = 1.3961, lr_0 = 1.1150e-04
Loss = 3.4137e-01, PNorm = 63.5758, GNorm = 1.1432, lr_0 = 1.1143e-04
Loss = 3.3142e-01, PNorm = 63.5763, GNorm = 2.3504, lr_0 = 1.1135e-04
Loss = 3.2581e-01, PNorm = 63.5774, GNorm = 1.3582, lr_0 = 1.1128e-04
Loss = 3.4748e-01, PNorm = 63.5787, GNorm = 1.4776, lr_0 = 1.1120e-04
Loss = 3.4039e-01, PNorm = 63.5787, GNorm = 1.4759, lr_0 = 1.1112e-04
Loss = 3.2922e-01, PNorm = 63.5783, GNorm = 1.3434, lr_0 = 1.1105e-04
Loss = 3.6303e-01, PNorm = 63.5787, GNorm = 1.2780, lr_0 = 1.1097e-04
Loss = 3.9487e-01, PNorm = 63.5805, GNorm = 1.4529, lr_0 = 1.1089e-04
Loss = 3.7667e-01, PNorm = 63.5816, GNorm = 2.0054, lr_0 = 1.1082e-04
Loss = 3.2074e-01, PNorm = 63.5806, GNorm = 1.4663, lr_0 = 1.1074e-04
Loss = 3.7960e-01, PNorm = 63.5806, GNorm = 1.3487, lr_0 = 1.1067e-04
Loss = 3.5713e-01, PNorm = 63.5817, GNorm = 1.1835, lr_0 = 1.1059e-04
Loss = 3.7949e-01, PNorm = 63.5824, GNorm = 1.4955, lr_0 = 1.1052e-04
Loss = 4.5721e-01, PNorm = 63.5832, GNorm = 1.3355, lr_0 = 1.1044e-04
Loss = 3.5598e-01, PNorm = 63.5852, GNorm = 1.5626, lr_0 = 1.1036e-04
Loss = 3.4608e-01, PNorm = 63.5862, GNorm = 1.1039, lr_0 = 1.1029e-04
Loss = 3.4899e-01, PNorm = 63.5860, GNorm = 2.1006, lr_0 = 1.1021e-04
Loss = 3.6297e-01, PNorm = 63.5863, GNorm = 1.2029, lr_0 = 1.1014e-04
Loss = 3.3953e-01, PNorm = 63.5879, GNorm = 1.1388, lr_0 = 1.1006e-04
Loss = 3.3087e-01, PNorm = 63.5894, GNorm = 1.4124, lr_0 = 1.0999e-04
Loss = 3.1473e-01, PNorm = 63.5901, GNorm = 1.4412, lr_0 = 1.0991e-04
Loss = 3.5681e-01, PNorm = 63.5892, GNorm = 1.5110, lr_0 = 1.0984e-04
Loss = 3.8900e-01, PNorm = 63.5905, GNorm = 2.5190, lr_0 = 1.0976e-04
Loss = 2.6372e-01, PNorm = 63.5937, GNorm = 1.6819, lr_0 = 1.0969e-04
Loss = 3.3663e-01, PNorm = 63.5952, GNorm = 1.3009, lr_0 = 1.0961e-04
Loss = 3.3432e-01, PNorm = 63.5956, GNorm = 1.1334, lr_0 = 1.0954e-04
Loss = 3.1989e-01, PNorm = 63.5965, GNorm = 1.5914, lr_0 = 1.0946e-04
Loss = 3.6378e-01, PNorm = 63.5974, GNorm = 1.2102, lr_0 = 1.0939e-04
Loss = 3.3625e-01, PNorm = 63.5986, GNorm = 1.2468, lr_0 = 1.0931e-04
Loss = 3.8347e-01, PNorm = 63.6004, GNorm = 1.8320, lr_0 = 1.0924e-04
Loss = 3.1973e-01, PNorm = 63.6011, GNorm = 1.9203, lr_0 = 1.0916e-04
Loss = 3.0143e-01, PNorm = 63.6022, GNorm = 1.5267, lr_0 = 1.0909e-04
Loss = 3.5472e-01, PNorm = 63.6043, GNorm = 1.0285, lr_0 = 1.0901e-04
Loss = 3.4461e-01, PNorm = 63.6059, GNorm = 1.2829, lr_0 = 1.0894e-04
Loss = 3.3280e-01, PNorm = 63.6076, GNorm = 1.3563, lr_0 = 1.0886e-04
Loss = 3.3699e-01, PNorm = 63.6073, GNorm = 1.3945, lr_0 = 1.0879e-04
Loss = 3.2955e-01, PNorm = 63.6084, GNorm = 1.7421, lr_0 = 1.0871e-04
Loss = 3.2863e-01, PNorm = 63.6090, GNorm = 1.2750, lr_0 = 1.0864e-04
Loss = 3.3310e-01, PNorm = 63.6096, GNorm = 1.9490, lr_0 = 1.0856e-04
Validation mae = 0.111233
Epoch 29
Loss = 3.1982e-01, PNorm = 63.6109, GNorm = 1.2255, lr_0 = 1.0849e-04
Loss = 3.1921e-01, PNorm = 63.6125, GNorm = 1.1268, lr_0 = 1.0841e-04
Loss = 3.5136e-01, PNorm = 63.6131, GNorm = 1.3771, lr_0 = 1.0834e-04
Loss = 3.1838e-01, PNorm = 63.6141, GNorm = 1.3009, lr_0 = 1.0827e-04
Loss = 3.9607e-01, PNorm = 63.6156, GNorm = 1.2237, lr_0 = 1.0819e-04
Loss = 3.2787e-01, PNorm = 63.6159, GNorm = 2.0991, lr_0 = 1.0812e-04
Loss = 3.3458e-01, PNorm = 63.6171, GNorm = 1.5884, lr_0 = 1.0804e-04
Loss = 3.0527e-01, PNorm = 63.6185, GNorm = 1.7928, lr_0 = 1.0797e-04
Loss = 2.9477e-01, PNorm = 63.6191, GNorm = 1.1787, lr_0 = 1.0790e-04
Loss = 3.6725e-01, PNorm = 63.6193, GNorm = 2.1320, lr_0 = 1.0782e-04
Loss = 3.1271e-01, PNorm = 63.6210, GNorm = 1.4029, lr_0 = 1.0775e-04
Loss = 3.3204e-01, PNorm = 63.6219, GNorm = 2.1590, lr_0 = 1.0767e-04
Loss = 3.6159e-01, PNorm = 63.6215, GNorm = 2.2069, lr_0 = 1.0760e-04
Loss = 3.1497e-01, PNorm = 63.6225, GNorm = 1.3905, lr_0 = 1.0753e-04
Loss = 3.4021e-01, PNorm = 63.6243, GNorm = 1.2320, lr_0 = 1.0745e-04
Loss = 3.4789e-01, PNorm = 63.6257, GNorm = 1.4635, lr_0 = 1.0738e-04
Loss = 3.3343e-01, PNorm = 63.6281, GNorm = 1.2138, lr_0 = 1.0731e-04
Loss = 3.6392e-01, PNorm = 63.6284, GNorm = 1.1357, lr_0 = 1.0723e-04
Loss = 3.5168e-01, PNorm = 63.6277, GNorm = 1.4990, lr_0 = 1.0716e-04
Loss = 3.7589e-01, PNorm = 63.6290, GNorm = 1.6601, lr_0 = 1.0709e-04
Loss = 3.4552e-01, PNorm = 63.6307, GNorm = 1.3358, lr_0 = 1.0701e-04
Loss = 3.5605e-01, PNorm = 63.6310, GNorm = 1.7139, lr_0 = 1.0694e-04
Loss = 3.4183e-01, PNorm = 63.6307, GNorm = 1.2333, lr_0 = 1.0687e-04
Loss = 3.1710e-01, PNorm = 63.6302, GNorm = 1.2603, lr_0 = 1.0679e-04
Loss = 3.0365e-01, PNorm = 63.6316, GNorm = 1.3053, lr_0 = 1.0672e-04
Loss = 3.3427e-01, PNorm = 63.6330, GNorm = 2.3470, lr_0 = 1.0665e-04
Loss = 3.5935e-01, PNorm = 63.6340, GNorm = 1.8659, lr_0 = 1.0657e-04
Loss = 3.0289e-01, PNorm = 63.6352, GNorm = 1.6793, lr_0 = 1.0650e-04
Loss = 3.0622e-01, PNorm = 63.6360, GNorm = 1.5462, lr_0 = 1.0643e-04
Loss = 3.5539e-01, PNorm = 63.6371, GNorm = 1.6962, lr_0 = 1.0635e-04
Loss = 3.2238e-01, PNorm = 63.6387, GNorm = 1.4858, lr_0 = 1.0628e-04
Loss = 3.5546e-01, PNorm = 63.6398, GNorm = 1.5815, lr_0 = 1.0621e-04
Loss = 3.5648e-01, PNorm = 63.6413, GNorm = 1.3254, lr_0 = 1.0614e-04
Loss = 3.4355e-01, PNorm = 63.6423, GNorm = 1.5663, lr_0 = 1.0606e-04
Loss = 3.8712e-01, PNorm = 63.6426, GNorm = 1.5202, lr_0 = 1.0599e-04
Loss = 3.2286e-01, PNorm = 63.6424, GNorm = 1.3929, lr_0 = 1.0592e-04
Loss = 3.5867e-01, PNorm = 63.6429, GNorm = 1.6349, lr_0 = 1.0585e-04
Loss = 3.2114e-01, PNorm = 63.6444, GNorm = 1.2827, lr_0 = 1.0577e-04
Loss = 3.5127e-01, PNorm = 63.6464, GNorm = 1.4465, lr_0 = 1.0570e-04
Loss = 3.1095e-01, PNorm = 63.6480, GNorm = 1.7877, lr_0 = 1.0563e-04
Loss = 4.0645e-01, PNorm = 63.6497, GNorm = 1.4818, lr_0 = 1.0556e-04
Loss = 2.9593e-01, PNorm = 63.6513, GNorm = 1.3382, lr_0 = 1.0548e-04
Loss = 3.5125e-01, PNorm = 63.6511, GNorm = 1.6907, lr_0 = 1.0541e-04
Loss = 3.6583e-01, PNorm = 63.6502, GNorm = 1.5770, lr_0 = 1.0534e-04
Loss = 3.4028e-01, PNorm = 63.6503, GNorm = 1.3118, lr_0 = 1.0527e-04
Loss = 3.5953e-01, PNorm = 63.6517, GNorm = 1.6005, lr_0 = 1.0519e-04
Loss = 3.7094e-01, PNorm = 63.6540, GNorm = 1.5179, lr_0 = 1.0512e-04
Loss = 3.7847e-01, PNorm = 63.6542, GNorm = 1.6015, lr_0 = 1.0505e-04
Loss = 3.2838e-01, PNorm = 63.6550, GNorm = 1.2784, lr_0 = 1.0498e-04
Loss = 3.6906e-01, PNorm = 63.6558, GNorm = 1.7508, lr_0 = 1.0491e-04
Loss = 2.9762e-01, PNorm = 63.6578, GNorm = 1.0515, lr_0 = 1.0483e-04
Loss = 3.4674e-01, PNorm = 63.6592, GNorm = 1.8112, lr_0 = 1.0476e-04
Loss = 3.9541e-01, PNorm = 63.6602, GNorm = 0.9665, lr_0 = 1.0469e-04
Loss = 2.9577e-01, PNorm = 63.6621, GNorm = 1.6340, lr_0 = 1.0462e-04
Loss = 3.2696e-01, PNorm = 63.6634, GNorm = 1.2425, lr_0 = 1.0455e-04
Loss = 3.5010e-01, PNorm = 63.6642, GNorm = 1.1441, lr_0 = 1.0448e-04
Loss = 4.2272e-01, PNorm = 63.6641, GNorm = 1.5703, lr_0 = 1.0440e-04
Loss = 3.8477e-01, PNorm = 63.6653, GNorm = 1.5453, lr_0 = 1.0433e-04
Loss = 3.0998e-01, PNorm = 63.6668, GNorm = 1.2456, lr_0 = 1.0426e-04
Loss = 3.4225e-01, PNorm = 63.6677, GNorm = 1.6524, lr_0 = 1.0419e-04
Loss = 3.1437e-01, PNorm = 63.6680, GNorm = 1.2222, lr_0 = 1.0412e-04
Loss = 3.5970e-01, PNorm = 63.6688, GNorm = 1.6629, lr_0 = 1.0405e-04
Loss = 3.4083e-01, PNorm = 63.6687, GNorm = 1.4860, lr_0 = 1.0398e-04
Loss = 3.6523e-01, PNorm = 63.6686, GNorm = 1.9276, lr_0 = 1.0391e-04
Loss = 3.4030e-01, PNorm = 63.6693, GNorm = 1.8875, lr_0 = 1.0383e-04
Loss = 3.3124e-01, PNorm = 63.6684, GNorm = 1.4230, lr_0 = 1.0376e-04
Loss = 3.1963e-01, PNorm = 63.6685, GNorm = 1.3080, lr_0 = 1.0369e-04
Loss = 3.3378e-01, PNorm = 63.6705, GNorm = 1.4209, lr_0 = 1.0362e-04
Loss = 3.1633e-01, PNorm = 63.6724, GNorm = 2.4704, lr_0 = 1.0355e-04
Loss = 3.6341e-01, PNorm = 63.6728, GNorm = 1.3426, lr_0 = 1.0348e-04
Loss = 3.3031e-01, PNorm = 63.6740, GNorm = 1.4519, lr_0 = 1.0341e-04
Loss = 3.1039e-01, PNorm = 63.6751, GNorm = 1.9278, lr_0 = 1.0334e-04
Loss = 3.4120e-01, PNorm = 63.6754, GNorm = 1.3901, lr_0 = 1.0327e-04
Loss = 3.3309e-01, PNorm = 63.6752, GNorm = 1.1973, lr_0 = 1.0320e-04
Loss = 3.8133e-01, PNorm = 63.6771, GNorm = 1.3928, lr_0 = 1.0312e-04
Loss = 3.2759e-01, PNorm = 63.6794, GNorm = 1.7959, lr_0 = 1.0305e-04
Loss = 3.3858e-01, PNorm = 63.6789, GNorm = 1.5158, lr_0 = 1.0298e-04
Loss = 3.8446e-01, PNorm = 63.6796, GNorm = 1.2039, lr_0 = 1.0291e-04
Loss = 3.3048e-01, PNorm = 63.6796, GNorm = 1.5147, lr_0 = 1.0284e-04
Loss = 3.3466e-01, PNorm = 63.6796, GNorm = 1.6748, lr_0 = 1.0277e-04
Loss = 3.0828e-01, PNorm = 63.6808, GNorm = 1.3272, lr_0 = 1.0270e-04
Loss = 3.4038e-01, PNorm = 63.6831, GNorm = 1.5187, lr_0 = 1.0263e-04
Loss = 4.0459e-01, PNorm = 63.6855, GNorm = 1.2959, lr_0 = 1.0256e-04
Loss = 3.0733e-01, PNorm = 63.6870, GNorm = 1.3279, lr_0 = 1.0249e-04
Loss = 3.4056e-01, PNorm = 63.6880, GNorm = 1.2628, lr_0 = 1.0242e-04
Loss = 3.1905e-01, PNorm = 63.6893, GNorm = 1.5409, lr_0 = 1.0235e-04
Loss = 3.9978e-01, PNorm = 63.6896, GNorm = 1.5852, lr_0 = 1.0228e-04
Loss = 3.0823e-01, PNorm = 63.6901, GNorm = 1.3960, lr_0 = 1.0221e-04
Loss = 3.1963e-01, PNorm = 63.6926, GNorm = 1.6061, lr_0 = 1.0214e-04
Loss = 3.5336e-01, PNorm = 63.6932, GNorm = 1.6069, lr_0 = 1.0207e-04
Loss = 3.6740e-01, PNorm = 63.6950, GNorm = 1.6752, lr_0 = 1.0200e-04
Loss = 3.8295e-01, PNorm = 63.6949, GNorm = 1.5240, lr_0 = 1.0193e-04
Loss = 3.7211e-01, PNorm = 63.6947, GNorm = 1.7014, lr_0 = 1.0186e-04
Loss = 3.2687e-01, PNorm = 63.6959, GNorm = 1.5355, lr_0 = 1.0179e-04
Loss = 3.2387e-01, PNorm = 63.6983, GNorm = 1.3419, lr_0 = 1.0172e-04
Loss = 3.2266e-01, PNorm = 63.6989, GNorm = 1.4827, lr_0 = 1.0165e-04
Loss = 3.2289e-01, PNorm = 63.6979, GNorm = 1.3255, lr_0 = 1.0158e-04
Loss = 3.1354e-01, PNorm = 63.6986, GNorm = 1.5709, lr_0 = 1.0151e-04
Loss = 3.5643e-01, PNorm = 63.6991, GNorm = 1.6494, lr_0 = 1.0144e-04
Loss = 2.9877e-01, PNorm = 63.6984, GNorm = 1.0271, lr_0 = 1.0137e-04
Loss = 3.3879e-01, PNorm = 63.6979, GNorm = 1.2931, lr_0 = 1.0130e-04
Loss = 3.3963e-01, PNorm = 63.6979, GNorm = 1.3365, lr_0 = 1.0123e-04
Loss = 3.6351e-01, PNorm = 63.6979, GNorm = 1.5365, lr_0 = 1.0116e-04
Loss = 3.5706e-01, PNorm = 63.6993, GNorm = 2.0193, lr_0 = 1.0110e-04
Loss = 2.8818e-01, PNorm = 63.7007, GNorm = 1.8920, lr_0 = 1.0103e-04
Loss = 4.0106e-01, PNorm = 63.7025, GNorm = 1.4309, lr_0 = 1.0096e-04
Loss = 3.2475e-01, PNorm = 63.7034, GNorm = 1.4604, lr_0 = 1.0089e-04
Loss = 3.4161e-01, PNorm = 63.7032, GNorm = 1.4656, lr_0 = 1.0082e-04
Loss = 3.2784e-01, PNorm = 63.7033, GNorm = 1.4803, lr_0 = 1.0075e-04
Loss = 3.4528e-01, PNorm = 63.7036, GNorm = 1.5202, lr_0 = 1.0068e-04
Loss = 3.0274e-01, PNorm = 63.7046, GNorm = 1.5808, lr_0 = 1.0061e-04
Loss = 3.2631e-01, PNorm = 63.7057, GNorm = 1.3554, lr_0 = 1.0054e-04
Loss = 3.6010e-01, PNorm = 63.7057, GNorm = 2.1158, lr_0 = 1.0047e-04
Loss = 3.0552e-01, PNorm = 63.7077, GNorm = 1.4345, lr_0 = 1.0041e-04
Loss = 3.4485e-01, PNorm = 63.7078, GNorm = 1.6505, lr_0 = 1.0034e-04
Loss = 3.9723e-01, PNorm = 63.7085, GNorm = 1.4694, lr_0 = 1.0027e-04
Loss = 3.6126e-01, PNorm = 63.7103, GNorm = 1.2516, lr_0 = 1.0020e-04
Loss = 3.8583e-01, PNorm = 63.7108, GNorm = 2.6338, lr_0 = 1.0013e-04
Loss = 3.2508e-01, PNorm = 63.7108, GNorm = 1.4132, lr_0 = 1.0006e-04
Loss = 3.4791e-01, PNorm = 63.7105, GNorm = 1.3725, lr_0 = 1.0000e-04
Validation mae = 0.112009
Model 0 best validation mae = 0.110936 on epoch 27
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.110406
Ensemble test mae = 0.110406
Fold 5
Splitting data with seed 5
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=300, bias=False)
        (W_h): Linear(in_features=300, out_features=300, bias=False)
        (W_o): Linear(in_features=433, out_features=300, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.2, inplace=False)
    (1): Linear(in_features=300, out_features=300, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=300, out_features=300, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.2, inplace=False)
    (7): Linear(in_features=300, out_features=1, bias=True)
  )
)
Number of parameters = 445,201
Moving model to cuda
Epoch 0
Loss = 1.0984e+00, PNorm = 38.1706, GNorm = 4.6237, lr_0 = 1.0413e-04
Loss = 9.6521e-01, PNorm = 38.1704, GNorm = 2.3411, lr_0 = 1.0788e-04
Loss = 9.7930e-01, PNorm = 38.1704, GNorm = 2.5508, lr_0 = 1.1163e-04
Loss = 9.7077e-01, PNorm = 38.1705, GNorm = 2.5147, lr_0 = 1.1537e-04
Loss = 9.7703e-01, PNorm = 38.1711, GNorm = 1.9329, lr_0 = 1.1913e-04
Loss = 1.1084e+00, PNorm = 38.1718, GNorm = 3.2248, lr_0 = 1.2287e-04
Loss = 9.3100e-01, PNorm = 38.1725, GNorm = 5.1055, lr_0 = 1.2663e-04
Loss = 8.5743e-01, PNorm = 38.1730, GNorm = 3.8130, lr_0 = 1.3038e-04
Loss = 8.7120e-01, PNorm = 38.1741, GNorm = 2.3978, lr_0 = 1.3413e-04
Loss = 1.0683e+00, PNorm = 38.1758, GNorm = 2.0496, lr_0 = 1.3788e-04
Loss = 8.6731e-01, PNorm = 38.1770, GNorm = 3.3353, lr_0 = 1.4163e-04
Loss = 8.8350e-01, PNorm = 38.1786, GNorm = 5.5413, lr_0 = 1.4537e-04
Loss = 9.0120e-01, PNorm = 38.1806, GNorm = 2.4626, lr_0 = 1.4913e-04
Loss = 9.1342e-01, PNorm = 38.1832, GNorm = 2.7918, lr_0 = 1.5288e-04
Loss = 9.0123e-01, PNorm = 38.1860, GNorm = 3.0975, lr_0 = 1.5662e-04
Loss = 8.5459e-01, PNorm = 38.1887, GNorm = 3.1852, lr_0 = 1.6038e-04
Loss = 9.2931e-01, PNorm = 38.1919, GNorm = 2.4890, lr_0 = 1.6412e-04
Loss = 8.8966e-01, PNorm = 38.1952, GNorm = 2.1997, lr_0 = 1.6788e-04
Loss = 8.5161e-01, PNorm = 38.1980, GNorm = 4.2839, lr_0 = 1.7163e-04
Loss = 8.1291e-01, PNorm = 38.2005, GNorm = 9.4854, lr_0 = 1.7538e-04
Loss = 7.6911e-01, PNorm = 38.2026, GNorm = 3.9589, lr_0 = 1.7913e-04
Loss = 8.2054e-01, PNorm = 38.2060, GNorm = 1.6788, lr_0 = 1.8288e-04
Loss = 8.0887e-01, PNorm = 38.2096, GNorm = 3.8320, lr_0 = 1.8662e-04
Loss = 7.5675e-01, PNorm = 38.2128, GNorm = 2.5764, lr_0 = 1.9038e-04
Loss = 7.4529e-01, PNorm = 38.2164, GNorm = 6.8859, lr_0 = 1.9413e-04
Loss = 7.3689e-01, PNorm = 38.2185, GNorm = 6.0260, lr_0 = 1.9788e-04
Loss = 8.7961e-01, PNorm = 38.2206, GNorm = 3.0898, lr_0 = 2.0163e-04
Loss = 7.2884e-01, PNorm = 38.2250, GNorm = 1.8835, lr_0 = 2.0537e-04
Loss = 7.1929e-01, PNorm = 38.2276, GNorm = 2.4628, lr_0 = 2.0913e-04
Loss = 6.8790e-01, PNorm = 38.2304, GNorm = 2.2681, lr_0 = 2.1288e-04
Loss = 7.8551e-01, PNorm = 38.2329, GNorm = 2.6687, lr_0 = 2.1663e-04
Loss = 7.7034e-01, PNorm = 38.2363, GNorm = 3.2877, lr_0 = 2.2038e-04
Loss = 7.1435e-01, PNorm = 38.2388, GNorm = 11.4736, lr_0 = 2.2412e-04
Loss = 7.6615e-01, PNorm = 38.2407, GNorm = 5.4495, lr_0 = 2.2787e-04
Loss = 6.3598e-01, PNorm = 38.2446, GNorm = 1.7886, lr_0 = 2.3163e-04
Loss = 7.6488e-01, PNorm = 38.2479, GNorm = 4.9656, lr_0 = 2.3538e-04
Loss = 6.8051e-01, PNorm = 38.2507, GNorm = 4.2699, lr_0 = 2.3913e-04
Loss = 6.6975e-01, PNorm = 38.2535, GNorm = 7.0658, lr_0 = 2.4288e-04
Loss = 6.6809e-01, PNorm = 38.2560, GNorm = 2.2648, lr_0 = 2.4662e-04
Loss = 7.1657e-01, PNorm = 38.2574, GNorm = 3.3512, lr_0 = 2.5038e-04
Loss = 7.1430e-01, PNorm = 38.2598, GNorm = 8.7030, lr_0 = 2.5413e-04
Loss = 7.2287e-01, PNorm = 38.2631, GNorm = 1.8404, lr_0 = 2.5788e-04
Loss = 6.6303e-01, PNorm = 38.2654, GNorm = 9.4420, lr_0 = 2.6163e-04
Loss = 6.5508e-01, PNorm = 38.2672, GNorm = 4.7748, lr_0 = 2.6537e-04
Loss = 7.4331e-01, PNorm = 38.2707, GNorm = 3.4987, lr_0 = 2.6912e-04
Loss = 7.1625e-01, PNorm = 38.2738, GNorm = 2.6627, lr_0 = 2.7288e-04
Loss = 6.6747e-01, PNorm = 38.2762, GNorm = 2.7855, lr_0 = 2.7663e-04
Loss = 6.9283e-01, PNorm = 38.2798, GNorm = 5.7677, lr_0 = 2.8038e-04
Loss = 7.0822e-01, PNorm = 38.2833, GNorm = 2.1143, lr_0 = 2.8413e-04
Loss = 7.3255e-01, PNorm = 38.2871, GNorm = 4.0633, lr_0 = 2.8787e-04
Loss = 7.0571e-01, PNorm = 38.2898, GNorm = 8.1139, lr_0 = 2.9163e-04
Loss = 6.9455e-01, PNorm = 38.2938, GNorm = 4.4558, lr_0 = 2.9538e-04
Loss = 7.5328e-01, PNorm = 38.2993, GNorm = 6.9672, lr_0 = 2.9913e-04
Loss = 7.0867e-01, PNorm = 38.3026, GNorm = 7.3877, lr_0 = 3.0288e-04
Loss = 6.7068e-01, PNorm = 38.3091, GNorm = 5.0949, lr_0 = 3.0662e-04
Loss = 6.2930e-01, PNorm = 38.3118, GNorm = 3.1083, lr_0 = 3.1037e-04
Loss = 6.1548e-01, PNorm = 38.3176, GNorm = 3.6798, lr_0 = 3.1413e-04
Loss = 6.5201e-01, PNorm = 38.3185, GNorm = 5.0223, lr_0 = 3.1788e-04
Loss = 6.3643e-01, PNorm = 38.3211, GNorm = 3.7315, lr_0 = 3.2163e-04
Loss = 7.3232e-01, PNorm = 38.3236, GNorm = 8.1505, lr_0 = 3.2538e-04
Loss = 6.1388e-01, PNorm = 38.3279, GNorm = 3.5655, lr_0 = 3.2912e-04
Loss = 6.6969e-01, PNorm = 38.3328, GNorm = 2.2990, lr_0 = 3.3288e-04
Loss = 7.1003e-01, PNorm = 38.3353, GNorm = 6.3643, lr_0 = 3.3663e-04
Loss = 6.8336e-01, PNorm = 38.3394, GNorm = 4.2085, lr_0 = 3.4038e-04
Loss = 6.1869e-01, PNorm = 38.3426, GNorm = 1.6627, lr_0 = 3.4413e-04
Loss = 6.5986e-01, PNorm = 38.3468, GNorm = 3.6082, lr_0 = 3.4787e-04
Loss = 6.3491e-01, PNorm = 38.3505, GNorm = 1.3575, lr_0 = 3.5162e-04
Loss = 6.6242e-01, PNorm = 38.3535, GNorm = 6.3096, lr_0 = 3.5538e-04
Loss = 6.8678e-01, PNorm = 38.3578, GNorm = 2.5949, lr_0 = 3.5913e-04
Loss = 5.9935e-01, PNorm = 38.3641, GNorm = 3.8605, lr_0 = 3.6288e-04
Loss = 6.7254e-01, PNorm = 38.3659, GNorm = 4.0389, lr_0 = 3.6662e-04
Loss = 6.6675e-01, PNorm = 38.3680, GNorm = 2.4690, lr_0 = 3.7037e-04
Loss = 6.6154e-01, PNorm = 38.3747, GNorm = 1.2677, lr_0 = 3.7413e-04
Loss = 6.4970e-01, PNorm = 38.3803, GNorm = 5.2372, lr_0 = 3.7788e-04
Loss = 6.9423e-01, PNorm = 38.3877, GNorm = 2.4273, lr_0 = 3.8163e-04
Loss = 7.7085e-01, PNorm = 38.3926, GNorm = 1.1506, lr_0 = 3.8537e-04
Loss = 6.7960e-01, PNorm = 38.3987, GNorm = 1.5810, lr_0 = 3.8912e-04
Loss = 5.5027e-01, PNorm = 38.4059, GNorm = 2.3549, lr_0 = 3.9287e-04
Loss = 6.4638e-01, PNorm = 38.4104, GNorm = 9.9056, lr_0 = 3.9663e-04
Loss = 6.7522e-01, PNorm = 38.4170, GNorm = 10.9381, lr_0 = 4.0038e-04
Loss = 7.1065e-01, PNorm = 38.4214, GNorm = 4.7223, lr_0 = 4.0413e-04
Loss = 6.0909e-01, PNorm = 38.4272, GNorm = 2.6326, lr_0 = 4.0787e-04
Loss = 7.5814e-01, PNorm = 38.4312, GNorm = 2.2075, lr_0 = 4.1162e-04
Loss = 5.6420e-01, PNorm = 38.4368, GNorm = 2.5931, lr_0 = 4.1537e-04
Loss = 5.8351e-01, PNorm = 38.4425, GNorm = 2.2719, lr_0 = 4.1913e-04
Loss = 6.2738e-01, PNorm = 38.4484, GNorm = 4.1880, lr_0 = 4.2288e-04
Loss = 5.8704e-01, PNorm = 38.4585, GNorm = 3.5202, lr_0 = 4.2662e-04
Loss = 7.3944e-01, PNorm = 38.4622, GNorm = 3.4443, lr_0 = 4.3037e-04
Loss = 6.2586e-01, PNorm = 38.4628, GNorm = 3.7561, lr_0 = 4.3412e-04
Loss = 7.4434e-01, PNorm = 38.4719, GNorm = 4.3401, lr_0 = 4.3788e-04
Loss = 6.1226e-01, PNorm = 38.4814, GNorm = 9.9147, lr_0 = 4.4163e-04
Loss = 6.1721e-01, PNorm = 38.4876, GNorm = 5.3816, lr_0 = 4.4538e-04
Loss = 6.3168e-01, PNorm = 38.4915, GNorm = 1.7111, lr_0 = 4.4912e-04
Loss = 7.0597e-01, PNorm = 38.5005, GNorm = 4.8748, lr_0 = 4.5287e-04
Loss = 7.0088e-01, PNorm = 38.5082, GNorm = 1.4462, lr_0 = 4.5662e-04
Loss = 6.1062e-01, PNorm = 38.5157, GNorm = 2.5407, lr_0 = 4.6038e-04
Loss = 6.7620e-01, PNorm = 38.5200, GNorm = 1.0612, lr_0 = 4.6413e-04
Loss = 6.4153e-01, PNorm = 38.5262, GNorm = 1.0166, lr_0 = 4.6787e-04
Loss = 6.9714e-01, PNorm = 38.5374, GNorm = 1.9260, lr_0 = 4.7162e-04
Loss = 6.6147e-01, PNorm = 38.5388, GNorm = 1.3988, lr_0 = 4.7537e-04
Loss = 6.5040e-01, PNorm = 38.5463, GNorm = 5.9781, lr_0 = 4.7913e-04
Loss = 5.2565e-01, PNorm = 38.5536, GNorm = 1.7713, lr_0 = 4.8288e-04
Loss = 5.8765e-01, PNorm = 38.5602, GNorm = 2.0333, lr_0 = 4.8663e-04
Loss = 6.8680e-01, PNorm = 38.5667, GNorm = 5.2634, lr_0 = 4.9038e-04
Loss = 6.5379e-01, PNorm = 38.5735, GNorm = 2.4963, lr_0 = 4.9412e-04
Loss = 6.3623e-01, PNorm = 38.5829, GNorm = 2.0462, lr_0 = 4.9788e-04
Loss = 6.6390e-01, PNorm = 38.5948, GNorm = 2.5187, lr_0 = 5.0163e-04
Loss = 5.8151e-01, PNorm = 38.6054, GNorm = 4.4620, lr_0 = 5.0538e-04
Loss = 6.3587e-01, PNorm = 38.6117, GNorm = 1.9875, lr_0 = 5.0913e-04
Loss = 5.3136e-01, PNorm = 38.6166, GNorm = 3.8760, lr_0 = 5.1287e-04
Loss = 6.6668e-01, PNorm = 38.6255, GNorm = 1.8843, lr_0 = 5.1663e-04
Loss = 6.3676e-01, PNorm = 38.6350, GNorm = 1.4802, lr_0 = 5.2038e-04
Loss = 6.5029e-01, PNorm = 38.6404, GNorm = 3.7232, lr_0 = 5.2413e-04
Loss = 6.1600e-01, PNorm = 38.6506, GNorm = 2.0736, lr_0 = 5.2788e-04
Loss = 6.6380e-01, PNorm = 38.6618, GNorm = 4.3526, lr_0 = 5.3162e-04
Loss = 6.0430e-01, PNorm = 38.6749, GNorm = 2.5886, lr_0 = 5.3538e-04
Loss = 6.9379e-01, PNorm = 38.6862, GNorm = 5.7512, lr_0 = 5.3912e-04
Loss = 6.9429e-01, PNorm = 38.6937, GNorm = 2.3736, lr_0 = 5.4288e-04
Loss = 5.9710e-01, PNorm = 38.7013, GNorm = 2.0161, lr_0 = 5.4663e-04
Loss = 6.4808e-01, PNorm = 38.7097, GNorm = 2.2369, lr_0 = 5.5038e-04
Validation mae = 0.138343
Epoch 1
Loss = 5.7180e-01, PNorm = 38.7170, GNorm = 2.1290, lr_0 = 5.5413e-04
Loss = 6.5179e-01, PNorm = 38.7273, GNorm = 3.7683, lr_0 = 5.5787e-04
Loss = 5.9523e-01, PNorm = 38.7330, GNorm = 6.5191, lr_0 = 5.6163e-04
Loss = 5.5497e-01, PNorm = 38.7457, GNorm = 3.0916, lr_0 = 5.6538e-04
Loss = 6.9108e-01, PNorm = 38.7597, GNorm = 2.9785, lr_0 = 5.6913e-04
Loss = 8.2914e-01, PNorm = 38.7726, GNorm = 2.7988, lr_0 = 5.7288e-04
Loss = 6.6607e-01, PNorm = 38.7851, GNorm = 1.4085, lr_0 = 5.7662e-04
Loss = 6.2507e-01, PNorm = 38.8008, GNorm = 3.0632, lr_0 = 5.8038e-04
Loss = 5.3251e-01, PNorm = 38.8108, GNorm = 3.4049, lr_0 = 5.8413e-04
Loss = 6.3557e-01, PNorm = 38.8231, GNorm = 4.4430, lr_0 = 5.8788e-04
Loss = 6.6318e-01, PNorm = 38.8348, GNorm = 0.8944, lr_0 = 5.9163e-04
Loss = 6.2742e-01, PNorm = 38.8455, GNorm = 4.4069, lr_0 = 5.9538e-04
Loss = 5.7410e-01, PNorm = 38.8561, GNorm = 2.0916, lr_0 = 5.9913e-04
Loss = 5.7785e-01, PNorm = 38.8603, GNorm = 8.5617, lr_0 = 6.0288e-04
Loss = 6.3831e-01, PNorm = 38.8719, GNorm = 4.4638, lr_0 = 6.0663e-04
Loss = 6.1218e-01, PNorm = 38.8878, GNorm = 1.9052, lr_0 = 6.1038e-04
Loss = 6.2097e-01, PNorm = 38.9018, GNorm = 6.7996, lr_0 = 6.1413e-04
Loss = 6.4175e-01, PNorm = 38.9111, GNorm = 4.8905, lr_0 = 6.1788e-04
Loss = 6.5490e-01, PNorm = 38.9248, GNorm = 3.0458, lr_0 = 6.2163e-04
Loss = 5.8173e-01, PNorm = 38.9357, GNorm = 3.9958, lr_0 = 6.2538e-04
Loss = 6.4254e-01, PNorm = 38.9435, GNorm = 4.1041, lr_0 = 6.2913e-04
Loss = 6.5701e-01, PNorm = 38.9513, GNorm = 1.5392, lr_0 = 6.3288e-04
Loss = 6.1562e-01, PNorm = 38.9584, GNorm = 5.0187, lr_0 = 6.3663e-04
Loss = 5.5947e-01, PNorm = 38.9699, GNorm = 1.5380, lr_0 = 6.4038e-04
Loss = 5.5508e-01, PNorm = 38.9916, GNorm = 2.1082, lr_0 = 6.4413e-04
Loss = 6.2688e-01, PNorm = 39.0036, GNorm = 1.3020, lr_0 = 6.4788e-04
Loss = 5.5497e-01, PNorm = 39.0182, GNorm = 1.7665, lr_0 = 6.5163e-04
Loss = 5.7119e-01, PNorm = 39.0258, GNorm = 2.2878, lr_0 = 6.5538e-04
Loss = 6.0766e-01, PNorm = 39.0430, GNorm = 2.4972, lr_0 = 6.5913e-04
Loss = 6.6352e-01, PNorm = 39.0575, GNorm = 3.6311, lr_0 = 6.6288e-04
Loss = 6.6579e-01, PNorm = 39.0651, GNorm = 5.4669, lr_0 = 6.6663e-04
Loss = 7.2098e-01, PNorm = 39.0816, GNorm = 1.2694, lr_0 = 6.7038e-04
Loss = 6.0143e-01, PNorm = 39.1014, GNorm = 5.0976, lr_0 = 6.7413e-04
Loss = 7.1275e-01, PNorm = 39.1098, GNorm = 1.2367, lr_0 = 6.7788e-04
Loss = 6.2048e-01, PNorm = 39.1265, GNorm = 5.3013, lr_0 = 6.8163e-04
Loss = 7.0501e-01, PNorm = 39.1355, GNorm = 2.4866, lr_0 = 6.8538e-04
Loss = 5.6599e-01, PNorm = 39.1425, GNorm = 1.0681, lr_0 = 6.8913e-04
Loss = 5.8454e-01, PNorm = 39.1598, GNorm = 2.8284, lr_0 = 6.9288e-04
Loss = 6.9986e-01, PNorm = 39.1698, GNorm = 3.0270, lr_0 = 6.9663e-04
Loss = 5.8988e-01, PNorm = 39.1837, GNorm = 8.1990, lr_0 = 7.0038e-04
Loss = 7.1800e-01, PNorm = 39.1879, GNorm = 2.3573, lr_0 = 7.0413e-04
Loss = 6.0122e-01, PNorm = 39.2061, GNorm = 1.0582, lr_0 = 7.0788e-04
Loss = 5.2660e-01, PNorm = 39.2206, GNorm = 3.2029, lr_0 = 7.1163e-04
Loss = 5.6621e-01, PNorm = 39.2297, GNorm = 4.0748, lr_0 = 7.1538e-04
Loss = 5.9384e-01, PNorm = 39.2371, GNorm = 1.4142, lr_0 = 7.1913e-04
Loss = 4.9403e-01, PNorm = 39.2487, GNorm = 1.2410, lr_0 = 7.2288e-04
Loss = 4.9137e-01, PNorm = 39.2597, GNorm = 2.4268, lr_0 = 7.2663e-04
Loss = 5.5665e-01, PNorm = 39.2660, GNorm = 2.1551, lr_0 = 7.3038e-04
Loss = 5.7083e-01, PNorm = 39.2758, GNorm = 4.3693, lr_0 = 7.3413e-04
Loss = 5.1189e-01, PNorm = 39.2850, GNorm = 7.2739, lr_0 = 7.3788e-04
Loss = 5.5225e-01, PNorm = 39.2945, GNorm = 1.0736, lr_0 = 7.4163e-04
Loss = 5.8953e-01, PNorm = 39.3066, GNorm = 8.0427, lr_0 = 7.4538e-04
Loss = 5.9732e-01, PNorm = 39.3157, GNorm = 1.5324, lr_0 = 7.4913e-04
Loss = 5.5299e-01, PNorm = 39.3284, GNorm = 1.9686, lr_0 = 7.5288e-04
Loss = 5.6340e-01, PNorm = 39.3390, GNorm = 2.1794, lr_0 = 7.5663e-04
Loss = 5.7493e-01, PNorm = 39.3490, GNorm = 5.2392, lr_0 = 7.6038e-04
Loss = 5.6660e-01, PNorm = 39.3621, GNorm = 2.1772, lr_0 = 7.6413e-04
Loss = 6.5641e-01, PNorm = 39.3750, GNorm = 1.5858, lr_0 = 7.6788e-04
Loss = 6.0311e-01, PNorm = 39.3929, GNorm = 4.2448, lr_0 = 7.7163e-04
Loss = 5.2253e-01, PNorm = 39.4116, GNorm = 2.7530, lr_0 = 7.7538e-04
Loss = 5.8184e-01, PNorm = 39.4162, GNorm = 1.3461, lr_0 = 7.7913e-04
Loss = 5.2657e-01, PNorm = 39.4349, GNorm = 3.5292, lr_0 = 7.8288e-04
Loss = 5.4680e-01, PNorm = 39.4521, GNorm = 0.8192, lr_0 = 7.8663e-04
Loss = 5.8359e-01, PNorm = 39.4658, GNorm = 2.6866, lr_0 = 7.9038e-04
Loss = 5.4004e-01, PNorm = 39.4795, GNorm = 3.7225, lr_0 = 7.9413e-04
Loss = 6.5832e-01, PNorm = 39.4941, GNorm = 1.0027, lr_0 = 7.9788e-04
Loss = 7.4546e-01, PNorm = 39.5129, GNorm = 1.5371, lr_0 = 8.0163e-04
Loss = 7.0160e-01, PNorm = 39.5411, GNorm = 2.4661, lr_0 = 8.0538e-04
Loss = 6.4532e-01, PNorm = 39.5746, GNorm = 3.3390, lr_0 = 8.0913e-04
Loss = 6.0510e-01, PNorm = 39.5862, GNorm = 1.0190, lr_0 = 8.1288e-04
Loss = 5.6676e-01, PNorm = 39.6034, GNorm = 2.8275, lr_0 = 8.1663e-04
Loss = 5.7924e-01, PNorm = 39.6100, GNorm = 1.1586, lr_0 = 8.2038e-04
Loss = 5.5202e-01, PNorm = 39.6268, GNorm = 6.1623, lr_0 = 8.2413e-04
Loss = 5.5184e-01, PNorm = 39.6404, GNorm = 2.4374, lr_0 = 8.2788e-04
Loss = 5.6615e-01, PNorm = 39.6555, GNorm = 1.0646, lr_0 = 8.3163e-04
Loss = 6.4189e-01, PNorm = 39.6727, GNorm = 1.4570, lr_0 = 8.3538e-04
Loss = 5.6707e-01, PNorm = 39.6830, GNorm = 5.8978, lr_0 = 8.3913e-04
Loss = 6.7462e-01, PNorm = 39.7017, GNorm = 2.9621, lr_0 = 8.4288e-04
Loss = 5.5715e-01, PNorm = 39.7275, GNorm = 1.0517, lr_0 = 8.4663e-04
Loss = 4.9105e-01, PNorm = 39.7464, GNorm = 2.7104, lr_0 = 8.5038e-04
Loss = 6.6263e-01, PNorm = 39.7663, GNorm = 2.7585, lr_0 = 8.5413e-04
Loss = 5.7836e-01, PNorm = 39.7806, GNorm = 1.8115, lr_0 = 8.5788e-04
Loss = 5.3710e-01, PNorm = 39.8016, GNorm = 2.2105, lr_0 = 8.6163e-04
Loss = 6.0192e-01, PNorm = 39.8124, GNorm = 3.8938, lr_0 = 8.6538e-04
Loss = 5.3217e-01, PNorm = 39.8385, GNorm = 3.0147, lr_0 = 8.6913e-04
Loss = 5.7924e-01, PNorm = 39.8516, GNorm = 1.8257, lr_0 = 8.7288e-04
Loss = 5.9832e-01, PNorm = 39.8706, GNorm = 1.5642, lr_0 = 8.7663e-04
Loss = 4.9874e-01, PNorm = 39.8907, GNorm = 2.9810, lr_0 = 8.8038e-04
Loss = 6.6043e-01, PNorm = 39.9133, GNorm = 2.1394, lr_0 = 8.8413e-04
Loss = 5.5259e-01, PNorm = 39.9292, GNorm = 4.5883, lr_0 = 8.8788e-04
Loss = 6.8638e-01, PNorm = 39.9419, GNorm = 1.3859, lr_0 = 8.9163e-04
Loss = 6.0394e-01, PNorm = 39.9628, GNorm = 0.9032, lr_0 = 8.9538e-04
Loss = 6.0526e-01, PNorm = 39.9830, GNorm = 2.2869, lr_0 = 8.9913e-04
Loss = 6.3421e-01, PNorm = 40.0024, GNorm = 2.5349, lr_0 = 9.0288e-04
Loss = 5.8130e-01, PNorm = 40.0228, GNorm = 1.3143, lr_0 = 9.0663e-04
Loss = 5.8068e-01, PNorm = 40.0479, GNorm = 1.5377, lr_0 = 9.1038e-04
Loss = 5.5978e-01, PNorm = 40.0657, GNorm = 1.5697, lr_0 = 9.1413e-04
Loss = 4.9226e-01, PNorm = 40.0888, GNorm = 3.2103, lr_0 = 9.1788e-04
Loss = 6.3096e-01, PNorm = 40.1012, GNorm = 1.1318, lr_0 = 9.2163e-04
Loss = 6.0056e-01, PNorm = 40.1228, GNorm = 7.1759, lr_0 = 9.2538e-04
Loss = 6.1615e-01, PNorm = 40.1346, GNorm = 1.4926, lr_0 = 9.2913e-04
Loss = 5.8450e-01, PNorm = 40.1599, GNorm = 4.4279, lr_0 = 9.3288e-04
Loss = 6.2472e-01, PNorm = 40.1715, GNorm = 1.5313, lr_0 = 9.3663e-04
Loss = 5.3688e-01, PNorm = 40.1886, GNorm = 1.1496, lr_0 = 9.4038e-04
Loss = 6.5023e-01, PNorm = 40.2097, GNorm = 3.6120, lr_0 = 9.4413e-04
Loss = 6.1888e-01, PNorm = 40.2341, GNorm = 2.5262, lr_0 = 9.4788e-04
Loss = 5.9617e-01, PNorm = 40.2566, GNorm = 3.1124, lr_0 = 9.5163e-04
Loss = 5.5895e-01, PNorm = 40.2768, GNorm = 1.5420, lr_0 = 9.5538e-04
Loss = 5.6769e-01, PNorm = 40.3041, GNorm = 5.0220, lr_0 = 9.5913e-04
Loss = 5.3314e-01, PNorm = 40.3217, GNorm = 1.2321, lr_0 = 9.6288e-04
Loss = 6.4354e-01, PNorm = 40.3379, GNorm = 6.6083, lr_0 = 9.6663e-04
Loss = 5.8022e-01, PNorm = 40.3527, GNorm = 2.6834, lr_0 = 9.7038e-04
Loss = 4.9332e-01, PNorm = 40.3680, GNorm = 0.9003, lr_0 = 9.7413e-04
Loss = 5.0708e-01, PNorm = 40.3943, GNorm = 3.0871, lr_0 = 9.7788e-04
Loss = 5.1470e-01, PNorm = 40.4127, GNorm = 2.5622, lr_0 = 9.8163e-04
Loss = 5.8825e-01, PNorm = 40.4407, GNorm = 1.8589, lr_0 = 9.8537e-04
Loss = 5.4945e-01, PNorm = 40.4565, GNorm = 3.1547, lr_0 = 9.8912e-04
Loss = 5.7687e-01, PNorm = 40.4805, GNorm = 1.5034, lr_0 = 9.9288e-04
Loss = 5.9534e-01, PNorm = 40.5092, GNorm = 1.2809, lr_0 = 9.9663e-04
Loss = 5.5868e-01, PNorm = 40.5357, GNorm = 1.3393, lr_0 = 9.9993e-04
Validation mae = 0.138287
Epoch 2
Loss = 5.4163e-01, PNorm = 40.5536, GNorm = 2.2375, lr_0 = 9.9925e-04
Loss = 5.0316e-01, PNorm = 40.5740, GNorm = 1.1670, lr_0 = 9.9856e-04
Loss = 4.6177e-01, PNorm = 40.5969, GNorm = 1.1188, lr_0 = 9.9788e-04
Loss = 5.4260e-01, PNorm = 40.6156, GNorm = 2.9833, lr_0 = 9.9719e-04
Loss = 5.8245e-01, PNorm = 40.6357, GNorm = 4.4831, lr_0 = 9.9651e-04
Loss = 5.5729e-01, PNorm = 40.6541, GNorm = 1.0788, lr_0 = 9.9583e-04
Loss = 7.1387e-01, PNorm = 40.6862, GNorm = 4.9138, lr_0 = 9.9515e-04
Loss = 5.1969e-01, PNorm = 40.7092, GNorm = 1.2746, lr_0 = 9.9446e-04
Loss = 5.4360e-01, PNorm = 40.7315, GNorm = 2.2665, lr_0 = 9.9378e-04
Loss = 5.2891e-01, PNorm = 40.7652, GNorm = 1.0172, lr_0 = 9.9310e-04
Loss = 5.4343e-01, PNorm = 40.7682, GNorm = 1.7026, lr_0 = 9.9242e-04
Loss = 5.6713e-01, PNorm = 40.7779, GNorm = 1.3614, lr_0 = 9.9174e-04
Loss = 5.7279e-01, PNorm = 40.7964, GNorm = 1.7382, lr_0 = 9.9106e-04
Loss = 5.1729e-01, PNorm = 40.8263, GNorm = 1.8316, lr_0 = 9.9038e-04
Loss = 5.8263e-01, PNorm = 40.8563, GNorm = 1.2714, lr_0 = 9.8971e-04
Loss = 5.6780e-01, PNorm = 40.8728, GNorm = 1.8953, lr_0 = 9.8903e-04
Loss = 5.7115e-01, PNorm = 40.8943, GNorm = 1.4598, lr_0 = 9.8835e-04
Loss = 6.0231e-01, PNorm = 40.9184, GNorm = 2.6909, lr_0 = 9.8767e-04
Loss = 5.4346e-01, PNorm = 40.9385, GNorm = 1.1604, lr_0 = 9.8700e-04
Loss = 5.3529e-01, PNorm = 40.9542, GNorm = 1.5473, lr_0 = 9.8632e-04
Loss = 4.6802e-01, PNorm = 40.9797, GNorm = 1.3729, lr_0 = 9.8564e-04
Loss = 6.1570e-01, PNorm = 41.0059, GNorm = 2.8823, lr_0 = 9.8497e-04
Loss = 5.5476e-01, PNorm = 41.0282, GNorm = 2.3269, lr_0 = 9.8429e-04
Loss = 5.9437e-01, PNorm = 41.0537, GNorm = 1.2618, lr_0 = 9.8362e-04
Loss = 5.7907e-01, PNorm = 41.0690, GNorm = 3.2516, lr_0 = 9.8295e-04
Loss = 4.6312e-01, PNorm = 41.0942, GNorm = 1.6413, lr_0 = 9.8227e-04
Loss = 5.8548e-01, PNorm = 41.1258, GNorm = 1.9571, lr_0 = 9.8160e-04
Loss = 5.6747e-01, PNorm = 41.1443, GNorm = 1.3583, lr_0 = 9.8093e-04
Loss = 5.1467e-01, PNorm = 41.1654, GNorm = 1.6056, lr_0 = 9.8026e-04
Loss = 5.3786e-01, PNorm = 41.1899, GNorm = 3.6906, lr_0 = 9.7958e-04
Loss = 5.4048e-01, PNorm = 41.2141, GNorm = 3.8382, lr_0 = 9.7891e-04
Loss = 5.1978e-01, PNorm = 41.2383, GNorm = 1.6318, lr_0 = 9.7824e-04
Loss = 5.2536e-01, PNorm = 41.2611, GNorm = 1.7342, lr_0 = 9.7757e-04
Loss = 5.6246e-01, PNorm = 41.2792, GNorm = 1.4180, lr_0 = 9.7690e-04
Loss = 5.1897e-01, PNorm = 41.2932, GNorm = 1.6268, lr_0 = 9.7623e-04
Loss = 5.3554e-01, PNorm = 41.3196, GNorm = 1.1627, lr_0 = 9.7556e-04
Loss = 4.8188e-01, PNorm = 41.3299, GNorm = 1.2101, lr_0 = 9.7490e-04
Loss = 5.4841e-01, PNorm = 41.3454, GNorm = 0.9016, lr_0 = 9.7423e-04
Loss = 4.9270e-01, PNorm = 41.3557, GNorm = 1.6708, lr_0 = 9.7356e-04
Loss = 5.6295e-01, PNorm = 41.3768, GNorm = 1.8949, lr_0 = 9.7289e-04
Loss = 5.1543e-01, PNorm = 41.4031, GNorm = 1.4711, lr_0 = 9.7223e-04
Loss = 6.7854e-01, PNorm = 41.4250, GNorm = 1.0786, lr_0 = 9.7156e-04
Loss = 5.6321e-01, PNorm = 41.4427, GNorm = 1.2703, lr_0 = 9.7090e-04
Loss = 5.3495e-01, PNorm = 41.4707, GNorm = 1.2491, lr_0 = 9.7023e-04
Loss = 5.7175e-01, PNorm = 41.4967, GNorm = 1.4223, lr_0 = 9.6957e-04
Loss = 5.1723e-01, PNorm = 41.5154, GNorm = 0.9619, lr_0 = 9.6890e-04
Loss = 5.5302e-01, PNorm = 41.5338, GNorm = 3.3398, lr_0 = 9.6824e-04
Loss = 5.4121e-01, PNorm = 41.5530, GNorm = 5.4204, lr_0 = 9.6757e-04
Loss = 5.6303e-01, PNorm = 41.5771, GNorm = 2.0778, lr_0 = 9.6691e-04
Loss = 5.6914e-01, PNorm = 41.5895, GNorm = 1.5301, lr_0 = 9.6625e-04
Loss = 5.1161e-01, PNorm = 41.6102, GNorm = 1.5192, lr_0 = 9.6559e-04
Loss = 5.7330e-01, PNorm = 41.6351, GNorm = 1.4872, lr_0 = 9.6493e-04
Loss = 5.8121e-01, PNorm = 41.6453, GNorm = 1.7429, lr_0 = 9.6427e-04
Loss = 5.1541e-01, PNorm = 41.6626, GNorm = 2.6342, lr_0 = 9.6360e-04
Loss = 5.2910e-01, PNorm = 41.6822, GNorm = 2.1685, lr_0 = 9.6294e-04
Loss = 5.3046e-01, PNorm = 41.7004, GNorm = 1.3756, lr_0 = 9.6228e-04
Loss = 6.0943e-01, PNorm = 41.7147, GNorm = 1.8924, lr_0 = 9.6163e-04
Loss = 5.8421e-01, PNorm = 41.7330, GNorm = 3.2497, lr_0 = 9.6097e-04
Loss = 6.1306e-01, PNorm = 41.7648, GNorm = 3.4598, lr_0 = 9.6031e-04
Loss = 5.7404e-01, PNorm = 41.7811, GNorm = 4.3893, lr_0 = 9.5965e-04
Loss = 4.5017e-01, PNorm = 41.7879, GNorm = 2.6904, lr_0 = 9.5899e-04
Loss = 5.6962e-01, PNorm = 41.8051, GNorm = 1.1133, lr_0 = 9.5834e-04
Loss = 5.3036e-01, PNorm = 41.8328, GNorm = 1.5924, lr_0 = 9.5768e-04
Loss = 5.1971e-01, PNorm = 41.8524, GNorm = 1.9701, lr_0 = 9.5702e-04
Loss = 4.5238e-01, PNorm = 41.8646, GNorm = 2.3512, lr_0 = 9.5637e-04
Loss = 5.6177e-01, PNorm = 41.8865, GNorm = 1.7416, lr_0 = 9.5571e-04
Loss = 5.3319e-01, PNorm = 41.9091, GNorm = 1.7093, lr_0 = 9.5506e-04
Loss = 5.2519e-01, PNorm = 41.9338, GNorm = 3.1379, lr_0 = 9.5440e-04
Loss = 5.0768e-01, PNorm = 41.9432, GNorm = 1.7506, lr_0 = 9.5375e-04
Loss = 4.9184e-01, PNorm = 41.9545, GNorm = 1.8839, lr_0 = 9.5310e-04
Loss = 5.2838e-01, PNorm = 41.9781, GNorm = 1.3375, lr_0 = 9.5244e-04
Loss = 5.8090e-01, PNorm = 41.9920, GNorm = 3.2048, lr_0 = 9.5179e-04
Loss = 5.3844e-01, PNorm = 42.0081, GNorm = 1.9686, lr_0 = 9.5114e-04
Loss = 5.7542e-01, PNorm = 42.0464, GNorm = 2.1050, lr_0 = 9.5049e-04
Loss = 6.1595e-01, PNorm = 42.0755, GNorm = 1.9472, lr_0 = 9.4984e-04
Loss = 5.3890e-01, PNorm = 42.0955, GNorm = 3.3418, lr_0 = 9.4919e-04
Loss = 6.0003e-01, PNorm = 42.1176, GNorm = 2.2764, lr_0 = 9.4854e-04
Loss = 5.0957e-01, PNorm = 42.1400, GNorm = 1.1735, lr_0 = 9.4789e-04
Loss = 4.6260e-01, PNorm = 42.1669, GNorm = 0.7965, lr_0 = 9.4724e-04
Loss = 5.7777e-01, PNorm = 42.1787, GNorm = 2.1443, lr_0 = 9.4659e-04
Loss = 5.1978e-01, PNorm = 42.1914, GNorm = 1.3729, lr_0 = 9.4594e-04
Loss = 4.6471e-01, PNorm = 42.2107, GNorm = 0.9801, lr_0 = 9.4529e-04
Loss = 5.8053e-01, PNorm = 42.2280, GNorm = 1.7156, lr_0 = 9.4464e-04
Loss = 5.6764e-01, PNorm = 42.2427, GNorm = 1.5417, lr_0 = 9.4400e-04
Loss = 5.8420e-01, PNorm = 42.2629, GNorm = 3.2081, lr_0 = 9.4335e-04
Loss = 5.3952e-01, PNorm = 42.2932, GNorm = 5.3532, lr_0 = 9.4270e-04
Loss = 5.1049e-01, PNorm = 42.3199, GNorm = 1.4084, lr_0 = 9.4206e-04
Loss = 4.8282e-01, PNorm = 42.3409, GNorm = 1.3170, lr_0 = 9.4141e-04
Loss = 4.8975e-01, PNorm = 42.3613, GNorm = 1.7212, lr_0 = 9.4077e-04
Loss = 5.6271e-01, PNorm = 42.3811, GNorm = 3.2717, lr_0 = 9.4012e-04
Loss = 5.0284e-01, PNorm = 42.3916, GNorm = 2.0809, lr_0 = 9.3948e-04
Loss = 5.2030e-01, PNorm = 42.4089, GNorm = 2.7614, lr_0 = 9.3884e-04
Loss = 5.5782e-01, PNorm = 42.4274, GNorm = 2.5023, lr_0 = 9.3819e-04
Loss = 5.4103e-01, PNorm = 42.4546, GNorm = 1.2725, lr_0 = 9.3755e-04
Loss = 5.8036e-01, PNorm = 42.4815, GNorm = 3.8659, lr_0 = 9.3691e-04
Loss = 5.1097e-01, PNorm = 42.4926, GNorm = 1.8701, lr_0 = 9.3627e-04
Loss = 5.0698e-01, PNorm = 42.5029, GNorm = 1.3806, lr_0 = 9.3562e-04
Loss = 5.0869e-01, PNorm = 42.5174, GNorm = 2.5761, lr_0 = 9.3498e-04
Loss = 4.8783e-01, PNorm = 42.5399, GNorm = 1.8315, lr_0 = 9.3434e-04
Loss = 5.1811e-01, PNorm = 42.5571, GNorm = 1.3729, lr_0 = 9.3370e-04
Loss = 4.8758e-01, PNorm = 42.5791, GNorm = 1.3784, lr_0 = 9.3306e-04
Loss = 5.3543e-01, PNorm = 42.6023, GNorm = 0.8820, lr_0 = 9.3242e-04
Loss = 4.7206e-01, PNorm = 42.6274, GNorm = 1.4398, lr_0 = 9.3178e-04
Loss = 4.6851e-01, PNorm = 42.6423, GNorm = 2.0654, lr_0 = 9.3115e-04
Loss = 4.4429e-01, PNorm = 42.6564, GNorm = 2.2047, lr_0 = 9.3051e-04
Loss = 5.1527e-01, PNorm = 42.6663, GNorm = 2.2530, lr_0 = 9.2987e-04
Loss = 5.7084e-01, PNorm = 42.6697, GNorm = 1.7930, lr_0 = 9.2923e-04
Loss = 4.8714e-01, PNorm = 42.6843, GNorm = 1.7542, lr_0 = 9.2860e-04
Loss = 4.9068e-01, PNorm = 42.7081, GNorm = 1.1862, lr_0 = 9.2796e-04
Loss = 5.8621e-01, PNorm = 42.7244, GNorm = 1.8819, lr_0 = 9.2733e-04
Loss = 4.9334e-01, PNorm = 42.7414, GNorm = 1.2067, lr_0 = 9.2669e-04
Loss = 5.1664e-01, PNorm = 42.7625, GNorm = 1.3362, lr_0 = 9.2606e-04
Loss = 6.1574e-01, PNorm = 42.7857, GNorm = 3.6570, lr_0 = 9.2542e-04
Loss = 5.0941e-01, PNorm = 42.8118, GNorm = 1.1407, lr_0 = 9.2479e-04
Loss = 5.7829e-01, PNorm = 42.8335, GNorm = 0.9382, lr_0 = 9.2415e-04
Loss = 5.5019e-01, PNorm = 42.8601, GNorm = 1.5235, lr_0 = 9.2352e-04
Loss = 4.8584e-01, PNorm = 42.8712, GNorm = 1.2780, lr_0 = 9.2289e-04
Loss = 5.0877e-01, PNorm = 42.8886, GNorm = 2.6072, lr_0 = 9.2226e-04
Loss = 5.8540e-01, PNorm = 42.9101, GNorm = 1.3094, lr_0 = 9.2162e-04
Loss = 5.2394e-01, PNorm = 42.9317, GNorm = 1.3239, lr_0 = 9.2099e-04
Validation mae = 0.126377
Epoch 3
Loss = 4.7536e-01, PNorm = 42.9457, GNorm = 3.2685, lr_0 = 9.2036e-04
Loss = 4.7945e-01, PNorm = 42.9585, GNorm = 1.0175, lr_0 = 9.1973e-04
Loss = 5.8215e-01, PNorm = 42.9885, GNorm = 1.4978, lr_0 = 9.1910e-04
Loss = 5.1790e-01, PNorm = 43.0051, GNorm = 1.1131, lr_0 = 9.1847e-04
Loss = 5.6274e-01, PNorm = 43.0250, GNorm = 3.0840, lr_0 = 9.1784e-04
Loss = 5.4212e-01, PNorm = 43.0530, GNorm = 2.1637, lr_0 = 9.1721e-04
Loss = 5.5058e-01, PNorm = 43.0760, GNorm = 2.4533, lr_0 = 9.1658e-04
Loss = 5.6211e-01, PNorm = 43.0980, GNorm = 1.7281, lr_0 = 9.1596e-04
Loss = 5.4685e-01, PNorm = 43.1226, GNorm = 1.0394, lr_0 = 9.1533e-04
Loss = 4.7791e-01, PNorm = 43.1466, GNorm = 3.3364, lr_0 = 9.1470e-04
Loss = 4.9474e-01, PNorm = 43.1537, GNorm = 1.4843, lr_0 = 9.1408e-04
Loss = 4.9005e-01, PNorm = 43.1646, GNorm = 1.8300, lr_0 = 9.1345e-04
Loss = 4.9862e-01, PNorm = 43.1953, GNorm = 1.3115, lr_0 = 9.1282e-04
Loss = 4.7007e-01, PNorm = 43.2224, GNorm = 1.3425, lr_0 = 9.1220e-04
Loss = 5.3912e-01, PNorm = 43.2383, GNorm = 2.1824, lr_0 = 9.1157e-04
Loss = 5.1715e-01, PNorm = 43.2576, GNorm = 2.1079, lr_0 = 9.1095e-04
Loss = 5.1305e-01, PNorm = 43.2749, GNorm = 3.4462, lr_0 = 9.1032e-04
Loss = 5.6248e-01, PNorm = 43.3013, GNorm = 1.4527, lr_0 = 9.0970e-04
Loss = 4.8944e-01, PNorm = 43.3371, GNorm = 1.9957, lr_0 = 9.0908e-04
Loss = 5.3747e-01, PNorm = 43.3621, GNorm = 1.5762, lr_0 = 9.0846e-04
Loss = 5.7079e-01, PNorm = 43.3766, GNorm = 5.5356, lr_0 = 9.0783e-04
Loss = 5.0904e-01, PNorm = 43.3905, GNorm = 2.0242, lr_0 = 9.0721e-04
Loss = 5.5970e-01, PNorm = 43.4129, GNorm = 1.6182, lr_0 = 9.0659e-04
Loss = 5.2322e-01, PNorm = 43.4437, GNorm = 1.7086, lr_0 = 9.0597e-04
Loss = 5.3457e-01, PNorm = 43.4555, GNorm = 1.2209, lr_0 = 9.0535e-04
Loss = 6.3094e-01, PNorm = 43.4739, GNorm = 2.7590, lr_0 = 9.0473e-04
Loss = 4.2980e-01, PNorm = 43.5035, GNorm = 1.3844, lr_0 = 9.0411e-04
Loss = 4.8753e-01, PNorm = 43.5352, GNorm = 2.5948, lr_0 = 9.0349e-04
Loss = 5.7409e-01, PNorm = 43.5617, GNorm = 2.4317, lr_0 = 9.0287e-04
Loss = 4.8621e-01, PNorm = 43.5779, GNorm = 2.8570, lr_0 = 9.0225e-04
Loss = 5.2573e-01, PNorm = 43.5977, GNorm = 1.8733, lr_0 = 9.0163e-04
Loss = 4.6266e-01, PNorm = 43.6134, GNorm = 1.0696, lr_0 = 9.0102e-04
Loss = 5.8485e-01, PNorm = 43.6314, GNorm = 1.7516, lr_0 = 9.0040e-04
Loss = 5.3843e-01, PNorm = 43.6422, GNorm = 2.3529, lr_0 = 8.9978e-04
Loss = 5.0252e-01, PNorm = 43.6643, GNorm = 1.1934, lr_0 = 8.9916e-04
Loss = 5.3432e-01, PNorm = 43.6780, GNorm = 2.0508, lr_0 = 8.9855e-04
Loss = 4.9816e-01, PNorm = 43.7040, GNorm = 1.9465, lr_0 = 8.9793e-04
Loss = 4.6834e-01, PNorm = 43.7165, GNorm = 1.3992, lr_0 = 8.9732e-04
Loss = 4.7954e-01, PNorm = 43.7351, GNorm = 1.5516, lr_0 = 8.9670e-04
Loss = 4.9722e-01, PNorm = 43.7499, GNorm = 2.6093, lr_0 = 8.9609e-04
Loss = 5.2167e-01, PNorm = 43.7660, GNorm = 1.2921, lr_0 = 8.9548e-04
Loss = 5.0838e-01, PNorm = 43.7917, GNorm = 1.5179, lr_0 = 8.9486e-04
Loss = 5.1357e-01, PNorm = 43.8075, GNorm = 1.4844, lr_0 = 8.9425e-04
Loss = 5.4255e-01, PNorm = 43.8238, GNorm = 1.4799, lr_0 = 8.9364e-04
Loss = 5.1097e-01, PNorm = 43.8474, GNorm = 2.9038, lr_0 = 8.9302e-04
Loss = 5.6599e-01, PNorm = 43.8749, GNorm = 0.9319, lr_0 = 8.9241e-04
Loss = 5.0671e-01, PNorm = 43.8909, GNorm = 1.9148, lr_0 = 8.9180e-04
Loss = 5.3512e-01, PNorm = 43.9042, GNorm = 1.1554, lr_0 = 8.9119e-04
Loss = 5.4404e-01, PNorm = 43.9157, GNorm = 1.9001, lr_0 = 8.9058e-04
Loss = 5.4822e-01, PNorm = 43.9352, GNorm = 1.2214, lr_0 = 8.8997e-04
Loss = 5.5150e-01, PNorm = 43.9598, GNorm = 1.7447, lr_0 = 8.8936e-04
Loss = 4.9684e-01, PNorm = 43.9763, GNorm = 1.3749, lr_0 = 8.8875e-04
Loss = 4.8269e-01, PNorm = 43.9863, GNorm = 1.3759, lr_0 = 8.8814e-04
Loss = 5.1843e-01, PNorm = 43.9993, GNorm = 1.0874, lr_0 = 8.8753e-04
Loss = 4.5120e-01, PNorm = 44.0135, GNorm = 1.3092, lr_0 = 8.8693e-04
Loss = 5.2614e-01, PNorm = 44.0272, GNorm = 2.2840, lr_0 = 8.8632e-04
Loss = 5.1538e-01, PNorm = 44.0450, GNorm = 1.8150, lr_0 = 8.8571e-04
Loss = 5.2924e-01, PNorm = 44.0646, GNorm = 0.9336, lr_0 = 8.8510e-04
Loss = 4.3927e-01, PNorm = 44.0881, GNorm = 1.1778, lr_0 = 8.8450e-04
Loss = 6.2224e-01, PNorm = 44.1192, GNorm = 2.4616, lr_0 = 8.8389e-04
Loss = 5.7188e-01, PNorm = 44.1423, GNorm = 2.0391, lr_0 = 8.8329e-04
Loss = 6.0236e-01, PNorm = 44.1800, GNorm = 1.3965, lr_0 = 8.8268e-04
Loss = 5.4292e-01, PNorm = 44.1996, GNorm = 1.5481, lr_0 = 8.8208e-04
Loss = 4.6071e-01, PNorm = 44.2198, GNorm = 2.0166, lr_0 = 8.8147e-04
Loss = 5.2172e-01, PNorm = 44.2318, GNorm = 1.5194, lr_0 = 8.8087e-04
Loss = 5.2138e-01, PNorm = 44.2436, GNorm = 1.4916, lr_0 = 8.8026e-04
Loss = 4.6265e-01, PNorm = 44.2634, GNorm = 2.0622, lr_0 = 8.7966e-04
Loss = 4.6762e-01, PNorm = 44.2725, GNorm = 1.2127, lr_0 = 8.7906e-04
Loss = 4.5268e-01, PNorm = 44.2907, GNorm = 1.6399, lr_0 = 8.7846e-04
Loss = 4.9195e-01, PNorm = 44.3095, GNorm = 1.0547, lr_0 = 8.7785e-04
Loss = 5.4702e-01, PNorm = 44.3229, GNorm = 1.0985, lr_0 = 8.7725e-04
Loss = 5.6172e-01, PNorm = 44.3496, GNorm = 1.5635, lr_0 = 8.7665e-04
Loss = 4.5959e-01, PNorm = 44.3700, GNorm = 1.0908, lr_0 = 8.7605e-04
Loss = 4.2510e-01, PNorm = 44.3925, GNorm = 2.2164, lr_0 = 8.7545e-04
Loss = 4.9871e-01, PNorm = 44.4158, GNorm = 1.2033, lr_0 = 8.7485e-04
Loss = 5.4647e-01, PNorm = 44.4336, GNorm = 2.9409, lr_0 = 8.7425e-04
Loss = 6.2434e-01, PNorm = 44.4448, GNorm = 1.4129, lr_0 = 8.7365e-04
Loss = 4.8591e-01, PNorm = 44.4688, GNorm = 1.1331, lr_0 = 8.7306e-04
Loss = 4.8131e-01, PNorm = 44.4893, GNorm = 2.4524, lr_0 = 8.7246e-04
Loss = 5.2403e-01, PNorm = 44.5051, GNorm = 4.2521, lr_0 = 8.7186e-04
Loss = 5.0797e-01, PNorm = 44.5314, GNorm = 2.1109, lr_0 = 8.7126e-04
Loss = 5.8920e-01, PNorm = 44.5538, GNorm = 1.7732, lr_0 = 8.7067e-04
Loss = 4.9696e-01, PNorm = 44.5719, GNorm = 1.1706, lr_0 = 8.7007e-04
Loss = 4.6262e-01, PNorm = 44.5874, GNorm = 1.1265, lr_0 = 8.6947e-04
Loss = 5.0982e-01, PNorm = 44.6060, GNorm = 1.4101, lr_0 = 8.6888e-04
Loss = 4.3510e-01, PNorm = 44.6245, GNorm = 0.9537, lr_0 = 8.6828e-04
Loss = 5.5726e-01, PNorm = 44.6352, GNorm = 1.3832, lr_0 = 8.6769e-04
Loss = 4.2336e-01, PNorm = 44.6486, GNorm = 3.7495, lr_0 = 8.6709e-04
Loss = 4.7237e-01, PNorm = 44.6613, GNorm = 1.9204, lr_0 = 8.6650e-04
Loss = 4.4269e-01, PNorm = 44.6758, GNorm = 2.5565, lr_0 = 8.6590e-04
Loss = 4.9140e-01, PNorm = 44.6946, GNorm = 2.3076, lr_0 = 8.6531e-04
Loss = 5.1031e-01, PNorm = 44.7149, GNorm = 1.5315, lr_0 = 8.6472e-04
Loss = 5.0933e-01, PNorm = 44.7234, GNorm = 1.0928, lr_0 = 8.6413e-04
Loss = 5.3696e-01, PNorm = 44.7325, GNorm = 0.9788, lr_0 = 8.6353e-04
Loss = 4.9189e-01, PNorm = 44.7448, GNorm = 1.7596, lr_0 = 8.6294e-04
Loss = 5.0231e-01, PNorm = 44.7614, GNorm = 3.0104, lr_0 = 8.6235e-04
Loss = 5.6159e-01, PNorm = 44.7761, GNorm = 1.5685, lr_0 = 8.6176e-04
Loss = 4.2454e-01, PNorm = 44.7922, GNorm = 0.9526, lr_0 = 8.6117e-04
Loss = 4.7146e-01, PNorm = 44.8135, GNorm = 1.4599, lr_0 = 8.6058e-04
Loss = 5.6197e-01, PNorm = 44.8442, GNorm = 2.1621, lr_0 = 8.5999e-04
Loss = 4.8138e-01, PNorm = 44.8671, GNorm = 2.2401, lr_0 = 8.5940e-04
Loss = 4.6241e-01, PNorm = 44.8817, GNorm = 0.7593, lr_0 = 8.5881e-04
Loss = 4.4550e-01, PNorm = 44.9009, GNorm = 2.9316, lr_0 = 8.5823e-04
Loss = 4.5634e-01, PNorm = 44.9146, GNorm = 1.4075, lr_0 = 8.5764e-04
Loss = 4.4723e-01, PNorm = 44.9261, GNorm = 1.3257, lr_0 = 8.5705e-04
Loss = 5.1886e-01, PNorm = 44.9421, GNorm = 1.0826, lr_0 = 8.5646e-04
Loss = 5.7832e-01, PNorm = 44.9690, GNorm = 1.2836, lr_0 = 8.5588e-04
Loss = 5.0537e-01, PNorm = 44.9892, GNorm = 1.0976, lr_0 = 8.5529e-04
Loss = 5.3207e-01, PNorm = 45.0058, GNorm = 2.4474, lr_0 = 8.5470e-04
Loss = 4.9547e-01, PNorm = 45.0238, GNorm = 1.6649, lr_0 = 8.5412e-04
Loss = 4.5694e-01, PNorm = 45.0387, GNorm = 1.5995, lr_0 = 8.5353e-04
Loss = 5.1185e-01, PNorm = 45.0534, GNorm = 0.8708, lr_0 = 8.5295e-04
Loss = 5.1506e-01, PNorm = 45.0623, GNorm = 1.4249, lr_0 = 8.5236e-04
Loss = 5.2011e-01, PNorm = 45.0772, GNorm = 1.3406, lr_0 = 8.5178e-04
Loss = 4.9105e-01, PNorm = 45.0969, GNorm = 1.3569, lr_0 = 8.5120e-04
Loss = 5.2212e-01, PNorm = 45.1063, GNorm = 1.3341, lr_0 = 8.5061e-04
Loss = 4.7263e-01, PNorm = 45.1177, GNorm = 1.2191, lr_0 = 8.5003e-04
Loss = 5.8845e-01, PNorm = 45.1305, GNorm = 2.8804, lr_0 = 8.4945e-04
Loss = 4.8705e-01, PNorm = 45.1497, GNorm = 1.2554, lr_0 = 8.4887e-04
Loss = 4.7902e-01, PNorm = 45.1748, GNorm = 1.2363, lr_0 = 8.4828e-04
Validation mae = 0.124881
Epoch 4
Loss = 4.4373e-01, PNorm = 45.1982, GNorm = 1.2904, lr_0 = 8.4770e-04
Loss = 5.1418e-01, PNorm = 45.2100, GNorm = 2.1124, lr_0 = 8.4712e-04
Loss = 5.1201e-01, PNorm = 45.2306, GNorm = 2.0340, lr_0 = 8.4654e-04
Loss = 5.0033e-01, PNorm = 45.2528, GNorm = 1.0191, lr_0 = 8.4596e-04
Loss = 5.4936e-01, PNorm = 45.2745, GNorm = 3.0513, lr_0 = 8.4538e-04
Loss = 4.6404e-01, PNorm = 45.2973, GNorm = 2.0536, lr_0 = 8.4480e-04
Loss = 5.8624e-01, PNorm = 45.3297, GNorm = 2.1376, lr_0 = 8.4423e-04
Loss = 5.3288e-01, PNorm = 45.3487, GNorm = 1.3266, lr_0 = 8.4365e-04
Loss = 5.1901e-01, PNorm = 45.3674, GNorm = 1.7924, lr_0 = 8.4307e-04
Loss = 4.9399e-01, PNorm = 45.3870, GNorm = 1.0440, lr_0 = 8.4249e-04
Loss = 5.3542e-01, PNorm = 45.4091, GNorm = 2.6229, lr_0 = 8.4191e-04
Loss = 4.2044e-01, PNorm = 45.4322, GNorm = 1.4442, lr_0 = 8.4134e-04
Loss = 5.2516e-01, PNorm = 45.4454, GNorm = 2.6793, lr_0 = 8.4076e-04
Loss = 5.1268e-01, PNorm = 45.4591, GNorm = 2.0639, lr_0 = 8.4019e-04
Loss = 4.9957e-01, PNorm = 45.4835, GNorm = 1.1867, lr_0 = 8.3961e-04
Loss = 4.8674e-01, PNorm = 45.5003, GNorm = 1.2588, lr_0 = 8.3903e-04
Loss = 4.8360e-01, PNorm = 45.5161, GNorm = 1.5774, lr_0 = 8.3846e-04
Loss = 4.7674e-01, PNorm = 45.5384, GNorm = 2.6394, lr_0 = 8.3789e-04
Loss = 5.1548e-01, PNorm = 45.5525, GNorm = 0.9677, lr_0 = 8.3731e-04
Loss = 4.8078e-01, PNorm = 45.5719, GNorm = 1.1825, lr_0 = 8.3674e-04
Loss = 5.3316e-01, PNorm = 45.5911, GNorm = 2.0703, lr_0 = 8.3616e-04
Loss = 4.6276e-01, PNorm = 45.6122, GNorm = 1.2425, lr_0 = 8.3559e-04
Loss = 5.3734e-01, PNorm = 45.6353, GNorm = 1.4757, lr_0 = 8.3502e-04
Loss = 4.7798e-01, PNorm = 45.6593, GNorm = 1.2002, lr_0 = 8.3445e-04
Loss = 5.1855e-01, PNorm = 45.6811, GNorm = 1.5611, lr_0 = 8.3388e-04
Loss = 4.6490e-01, PNorm = 45.6989, GNorm = 2.0472, lr_0 = 8.3330e-04
Loss = 5.3789e-01, PNorm = 45.7075, GNorm = 1.0109, lr_0 = 8.3273e-04
Loss = 4.2322e-01, PNorm = 45.7204, GNorm = 1.1481, lr_0 = 8.3216e-04
Loss = 4.8556e-01, PNorm = 45.7398, GNorm = 1.4419, lr_0 = 8.3159e-04
Loss = 5.2992e-01, PNorm = 45.7583, GNorm = 2.8640, lr_0 = 8.3102e-04
Loss = 5.2160e-01, PNorm = 45.7878, GNorm = 1.1271, lr_0 = 8.3045e-04
Loss = 4.9403e-01, PNorm = 45.8091, GNorm = 1.9679, lr_0 = 8.2988e-04
Loss = 4.7947e-01, PNorm = 45.8295, GNorm = 2.4310, lr_0 = 8.2932e-04
Loss = 5.0558e-01, PNorm = 45.8453, GNorm = 1.9296, lr_0 = 8.2875e-04
Loss = 4.9407e-01, PNorm = 45.8619, GNorm = 0.9378, lr_0 = 8.2818e-04
Loss = 4.5713e-01, PNorm = 45.8812, GNorm = 1.6885, lr_0 = 8.2761e-04
Loss = 5.9496e-01, PNorm = 45.8968, GNorm = 0.8588, lr_0 = 8.2705e-04
Loss = 5.2996e-01, PNorm = 45.9101, GNorm = 1.2480, lr_0 = 8.2648e-04
Loss = 4.7290e-01, PNorm = 45.9295, GNorm = 1.2850, lr_0 = 8.2591e-04
Loss = 4.9126e-01, PNorm = 45.9420, GNorm = 1.3803, lr_0 = 8.2535e-04
Loss = 4.9259e-01, PNorm = 45.9633, GNorm = 2.9065, lr_0 = 8.2478e-04
Loss = 4.7370e-01, PNorm = 45.9861, GNorm = 1.7099, lr_0 = 8.2422e-04
Loss = 4.4565e-01, PNorm = 46.0079, GNorm = 1.1561, lr_0 = 8.2365e-04
Loss = 5.4090e-01, PNorm = 46.0275, GNorm = 2.2596, lr_0 = 8.2309e-04
Loss = 4.9885e-01, PNorm = 46.0460, GNorm = 1.0411, lr_0 = 8.2252e-04
Loss = 4.8932e-01, PNorm = 46.0746, GNorm = 1.6482, lr_0 = 8.2196e-04
Loss = 4.7304e-01, PNorm = 46.0899, GNorm = 1.7782, lr_0 = 8.2140e-04
Loss = 5.5423e-01, PNorm = 46.0986, GNorm = 2.9447, lr_0 = 8.2084e-04
Loss = 5.1897e-01, PNorm = 46.1236, GNorm = 0.9976, lr_0 = 8.2027e-04
Loss = 5.1517e-01, PNorm = 46.1481, GNorm = 2.4025, lr_0 = 8.1971e-04
Loss = 5.1705e-01, PNorm = 46.1693, GNorm = 1.3748, lr_0 = 8.1915e-04
Loss = 4.4830e-01, PNorm = 46.1870, GNorm = 1.5027, lr_0 = 8.1859e-04
Loss = 4.3576e-01, PNorm = 46.2025, GNorm = 1.3049, lr_0 = 8.1803e-04
Loss = 5.4161e-01, PNorm = 46.2225, GNorm = 1.0035, lr_0 = 8.1747e-04
Loss = 5.0025e-01, PNorm = 46.2354, GNorm = 1.4631, lr_0 = 8.1691e-04
Loss = 4.7192e-01, PNorm = 46.2521, GNorm = 1.5298, lr_0 = 8.1635e-04
Loss = 4.8626e-01, PNorm = 46.2728, GNorm = 1.7928, lr_0 = 8.1579e-04
Loss = 4.5761e-01, PNorm = 46.2906, GNorm = 1.4395, lr_0 = 8.1523e-04
Loss = 4.7456e-01, PNorm = 46.3061, GNorm = 1.4942, lr_0 = 8.1467e-04
Loss = 4.8305e-01, PNorm = 46.3252, GNorm = 2.3531, lr_0 = 8.1411e-04
Loss = 4.8085e-01, PNorm = 46.3411, GNorm = 2.1920, lr_0 = 8.1355e-04
Loss = 5.3162e-01, PNorm = 46.3561, GNorm = 1.6618, lr_0 = 8.1300e-04
Loss = 4.3378e-01, PNorm = 46.3730, GNorm = 1.0643, lr_0 = 8.1244e-04
Loss = 4.5767e-01, PNorm = 46.3896, GNorm = 1.0680, lr_0 = 8.1188e-04
Loss = 5.0615e-01, PNorm = 46.4056, GNorm = 2.7303, lr_0 = 8.1133e-04
Loss = 4.4849e-01, PNorm = 46.4163, GNorm = 1.4099, lr_0 = 8.1077e-04
Loss = 4.8846e-01, PNorm = 46.4362, GNorm = 1.9533, lr_0 = 8.1022e-04
Loss = 5.0396e-01, PNorm = 46.4536, GNorm = 1.5163, lr_0 = 8.0966e-04
Loss = 3.9892e-01, PNorm = 46.4779, GNorm = 1.4040, lr_0 = 8.0911e-04
Loss = 5.1196e-01, PNorm = 46.4962, GNorm = 1.9281, lr_0 = 8.0855e-04
Loss = 4.5757e-01, PNorm = 46.5072, GNorm = 0.9675, lr_0 = 8.0800e-04
Loss = 4.4673e-01, PNorm = 46.5240, GNorm = 2.1259, lr_0 = 8.0745e-04
Loss = 5.3920e-01, PNorm = 46.5468, GNorm = 1.3404, lr_0 = 8.0689e-04
Loss = 5.6555e-01, PNorm = 46.5681, GNorm = 3.3827, lr_0 = 8.0634e-04
Loss = 5.7393e-01, PNorm = 46.5847, GNorm = 1.6642, lr_0 = 8.0579e-04
Loss = 4.9429e-01, PNorm = 46.5997, GNorm = 0.9540, lr_0 = 8.0523e-04
Loss = 5.4647e-01, PNorm = 46.6205, GNorm = 2.1696, lr_0 = 8.0468e-04
Loss = 4.5116e-01, PNorm = 46.6448, GNorm = 1.3315, lr_0 = 8.0413e-04
Loss = 4.3709e-01, PNorm = 46.6592, GNorm = 2.6767, lr_0 = 8.0358e-04
Loss = 4.2760e-01, PNorm = 46.6791, GNorm = 1.2325, lr_0 = 8.0303e-04
Loss = 4.8043e-01, PNorm = 46.6956, GNorm = 1.5628, lr_0 = 8.0248e-04
Loss = 4.7528e-01, PNorm = 46.7063, GNorm = 2.2713, lr_0 = 8.0193e-04
Loss = 4.1557e-01, PNorm = 46.7203, GNorm = 0.9902, lr_0 = 8.0138e-04
Loss = 4.7946e-01, PNorm = 46.7444, GNorm = 1.2956, lr_0 = 8.0083e-04
Loss = 4.7213e-01, PNorm = 46.7628, GNorm = 1.9758, lr_0 = 8.0028e-04
Loss = 4.8619e-01, PNorm = 46.7821, GNorm = 1.4404, lr_0 = 7.9974e-04
Loss = 4.5121e-01, PNorm = 46.7959, GNorm = 1.5359, lr_0 = 7.9919e-04
Loss = 4.8404e-01, PNorm = 46.8119, GNorm = 1.0607, lr_0 = 7.9864e-04
Loss = 4.9792e-01, PNorm = 46.8331, GNorm = 1.4898, lr_0 = 7.9809e-04
Loss = 4.3158e-01, PNorm = 46.8493, GNorm = 2.6842, lr_0 = 7.9755e-04
Loss = 5.1147e-01, PNorm = 46.8649, GNorm = 1.1077, lr_0 = 7.9700e-04
Loss = 5.1087e-01, PNorm = 46.8746, GNorm = 1.3204, lr_0 = 7.9645e-04
Loss = 4.2757e-01, PNorm = 46.8840, GNorm = 1.7173, lr_0 = 7.9591e-04
Loss = 5.1156e-01, PNorm = 46.9006, GNorm = 0.9995, lr_0 = 7.9536e-04
Loss = 5.0064e-01, PNorm = 46.9169, GNorm = 1.7007, lr_0 = 7.9482e-04
Loss = 4.4883e-01, PNorm = 46.9294, GNorm = 1.1668, lr_0 = 7.9427e-04
Loss = 4.3329e-01, PNorm = 46.9483, GNorm = 1.2820, lr_0 = 7.9373e-04
Loss = 4.2045e-01, PNorm = 46.9603, GNorm = 1.1931, lr_0 = 7.9319e-04
Loss = 5.4485e-01, PNorm = 46.9814, GNorm = 1.7177, lr_0 = 7.9264e-04
Loss = 4.7828e-01, PNorm = 47.0077, GNorm = 2.1849, lr_0 = 7.9210e-04
Loss = 4.5427e-01, PNorm = 47.0199, GNorm = 1.3366, lr_0 = 7.9156e-04
Loss = 5.2827e-01, PNorm = 47.0416, GNorm = 1.1812, lr_0 = 7.9101e-04
Loss = 6.0832e-01, PNorm = 47.0678, GNorm = 2.0111, lr_0 = 7.9047e-04
Loss = 4.4137e-01, PNorm = 47.0894, GNorm = 1.3021, lr_0 = 7.8993e-04
Loss = 4.9170e-01, PNorm = 47.0998, GNorm = 1.7503, lr_0 = 7.8939e-04
Loss = 4.9554e-01, PNorm = 47.1097, GNorm = 1.1056, lr_0 = 7.8885e-04
Loss = 4.6693e-01, PNorm = 47.1263, GNorm = 1.3565, lr_0 = 7.8831e-04
Loss = 4.5197e-01, PNorm = 47.1500, GNorm = 2.9686, lr_0 = 7.8777e-04
Loss = 5.0032e-01, PNorm = 47.1714, GNorm = 2.0267, lr_0 = 7.8723e-04
Loss = 4.7192e-01, PNorm = 47.1871, GNorm = 1.4435, lr_0 = 7.8669e-04
Loss = 4.1762e-01, PNorm = 47.2034, GNorm = 1.2335, lr_0 = 7.8615e-04
Loss = 4.7788e-01, PNorm = 47.2267, GNorm = 1.4660, lr_0 = 7.8561e-04
Loss = 4.8825e-01, PNorm = 47.2399, GNorm = 2.3613, lr_0 = 7.8507e-04
Loss = 5.1625e-01, PNorm = 47.2648, GNorm = 1.9768, lr_0 = 7.8454e-04
Loss = 5.6419e-01, PNorm = 47.2874, GNorm = 1.4721, lr_0 = 7.8400e-04
Loss = 5.0955e-01, PNorm = 47.3061, GNorm = 2.5710, lr_0 = 7.8346e-04
Loss = 4.4854e-01, PNorm = 47.3150, GNorm = 1.2243, lr_0 = 7.8293e-04
Loss = 4.5865e-01, PNorm = 47.3350, GNorm = 1.7172, lr_0 = 7.8239e-04
Loss = 4.5111e-01, PNorm = 47.3616, GNorm = 3.0427, lr_0 = 7.8185e-04
Loss = 4.4893e-01, PNorm = 47.3827, GNorm = 1.6522, lr_0 = 7.8132e-04
Validation mae = 0.124018
Epoch 5
Loss = 3.9652e-01, PNorm = 47.3972, GNorm = 1.2881, lr_0 = 7.8078e-04
Loss = 4.7602e-01, PNorm = 47.4172, GNorm = 0.9615, lr_0 = 7.8025e-04
Loss = 4.9416e-01, PNorm = 47.4394, GNorm = 1.5716, lr_0 = 7.7971e-04
Loss = 4.9676e-01, PNorm = 47.4553, GNorm = 1.5432, lr_0 = 7.7918e-04
Loss = 5.0332e-01, PNorm = 47.4661, GNorm = 1.7172, lr_0 = 7.7864e-04
Loss = 4.2742e-01, PNorm = 47.4843, GNorm = 1.5280, lr_0 = 7.7811e-04
Loss = 4.2051e-01, PNorm = 47.5011, GNorm = 1.9456, lr_0 = 7.7758e-04
Loss = 4.7290e-01, PNorm = 47.5189, GNorm = 1.1508, lr_0 = 7.7705e-04
Loss = 4.5298e-01, PNorm = 47.5465, GNorm = 1.4154, lr_0 = 7.7651e-04
Loss = 4.2689e-01, PNorm = 47.5688, GNorm = 1.3535, lr_0 = 7.7598e-04
Loss = 5.6120e-01, PNorm = 47.5888, GNorm = 2.6663, lr_0 = 7.7545e-04
Loss = 4.9328e-01, PNorm = 47.6139, GNorm = 1.7773, lr_0 = 7.7492e-04
Loss = 4.1792e-01, PNorm = 47.6208, GNorm = 1.0703, lr_0 = 7.7439e-04
Loss = 4.4229e-01, PNorm = 47.6348, GNorm = 2.7441, lr_0 = 7.7386e-04
Loss = 4.5267e-01, PNorm = 47.6548, GNorm = 1.1841, lr_0 = 7.7333e-04
Loss = 5.4758e-01, PNorm = 47.6740, GNorm = 2.3677, lr_0 = 7.7280e-04
Loss = 4.8772e-01, PNorm = 47.6923, GNorm = 1.1873, lr_0 = 7.7227e-04
Loss = 4.5701e-01, PNorm = 47.7054, GNorm = 1.0945, lr_0 = 7.7174e-04
Loss = 4.3497e-01, PNorm = 47.7253, GNorm = 1.1230, lr_0 = 7.7121e-04
Loss = 5.1632e-01, PNorm = 47.7486, GNorm = 1.5609, lr_0 = 7.7068e-04
Loss = 4.9903e-01, PNorm = 47.7774, GNorm = 1.1253, lr_0 = 7.7015e-04
Loss = 5.1458e-01, PNorm = 47.8012, GNorm = 1.2217, lr_0 = 7.6963e-04
Loss = 5.0560e-01, PNorm = 47.8314, GNorm = 1.4956, lr_0 = 7.6910e-04
Loss = 4.0611e-01, PNorm = 47.8408, GNorm = 1.3608, lr_0 = 7.6857e-04
Loss = 4.5684e-01, PNorm = 47.8447, GNorm = 1.0679, lr_0 = 7.6805e-04
Loss = 4.8619e-01, PNorm = 47.8597, GNorm = 1.7531, lr_0 = 7.6752e-04
Loss = 4.7329e-01, PNorm = 47.8800, GNorm = 1.4422, lr_0 = 7.6699e-04
Loss = 4.5506e-01, PNorm = 47.8997, GNorm = 3.5627, lr_0 = 7.6647e-04
Loss = 4.9471e-01, PNorm = 47.9218, GNorm = 1.0582, lr_0 = 7.6594e-04
Loss = 4.2517e-01, PNorm = 47.9360, GNorm = 1.3920, lr_0 = 7.6542e-04
Loss = 4.1232e-01, PNorm = 47.9488, GNorm = 0.8125, lr_0 = 7.6489e-04
Loss = 3.8440e-01, PNorm = 47.9574, GNorm = 1.6389, lr_0 = 7.6437e-04
Loss = 4.9666e-01, PNorm = 47.9660, GNorm = 1.5924, lr_0 = 7.6385e-04
Loss = 4.7610e-01, PNorm = 47.9825, GNorm = 0.9314, lr_0 = 7.6332e-04
Loss = 4.2730e-01, PNorm = 47.9984, GNorm = 1.3982, lr_0 = 7.6280e-04
Loss = 4.6596e-01, PNorm = 48.0071, GNorm = 1.5594, lr_0 = 7.6228e-04
Loss = 5.1012e-01, PNorm = 48.0186, GNorm = 1.5924, lr_0 = 7.6176e-04
Loss = 4.8232e-01, PNorm = 48.0408, GNorm = 1.3217, lr_0 = 7.6123e-04
Loss = 4.0098e-01, PNorm = 48.0599, GNorm = 1.2396, lr_0 = 7.6071e-04
Loss = 4.9851e-01, PNorm = 48.0768, GNorm = 2.2666, lr_0 = 7.6019e-04
Loss = 4.7456e-01, PNorm = 48.0991, GNorm = 1.5119, lr_0 = 7.5967e-04
Loss = 5.4345e-01, PNorm = 48.1150, GNorm = 1.8831, lr_0 = 7.5915e-04
Loss = 4.9276e-01, PNorm = 48.1351, GNorm = 1.2010, lr_0 = 7.5863e-04
Loss = 4.8475e-01, PNorm = 48.1507, GNorm = 1.7649, lr_0 = 7.5811e-04
Loss = 4.5878e-01, PNorm = 48.1664, GNorm = 0.9329, lr_0 = 7.5759e-04
Loss = 4.7009e-01, PNorm = 48.1814, GNorm = 1.1176, lr_0 = 7.5707e-04
Loss = 3.9734e-01, PNorm = 48.1984, GNorm = 1.1688, lr_0 = 7.5655e-04
Loss = 4.7838e-01, PNorm = 48.2134, GNorm = 2.0916, lr_0 = 7.5603e-04
Loss = 4.4895e-01, PNorm = 48.2303, GNorm = 1.5357, lr_0 = 7.5552e-04
Loss = 4.4881e-01, PNorm = 48.2446, GNorm = 1.5336, lr_0 = 7.5500e-04
Loss = 4.9622e-01, PNorm = 48.2559, GNorm = 1.4688, lr_0 = 7.5448e-04
Loss = 3.7930e-01, PNorm = 48.2735, GNorm = 2.1076, lr_0 = 7.5397e-04
Loss = 4.7553e-01, PNorm = 48.2863, GNorm = 1.3347, lr_0 = 7.5345e-04
Loss = 4.7660e-01, PNorm = 48.2994, GNorm = 1.2111, lr_0 = 7.5293e-04
Loss = 4.6561e-01, PNorm = 48.3191, GNorm = 1.2477, lr_0 = 7.5242e-04
Loss = 4.7839e-01, PNorm = 48.3363, GNorm = 1.6393, lr_0 = 7.5190e-04
Loss = 4.9239e-01, PNorm = 48.3568, GNorm = 1.6394, lr_0 = 7.5139e-04
Loss = 4.9569e-01, PNorm = 48.3757, GNorm = 2.1646, lr_0 = 7.5087e-04
Loss = 4.7207e-01, PNorm = 48.3945, GNorm = 1.7643, lr_0 = 7.5036e-04
Loss = 4.9998e-01, PNorm = 48.4135, GNorm = 1.4756, lr_0 = 7.4984e-04
Loss = 4.8625e-01, PNorm = 48.4293, GNorm = 1.2393, lr_0 = 7.4933e-04
Loss = 4.3082e-01, PNorm = 48.4509, GNorm = 1.2917, lr_0 = 7.4882e-04
Loss = 5.5365e-01, PNorm = 48.4682, GNorm = 2.8701, lr_0 = 7.4830e-04
Loss = 5.4240e-01, PNorm = 48.4841, GNorm = 1.7098, lr_0 = 7.4779e-04
Loss = 4.6093e-01, PNorm = 48.5046, GNorm = 1.1053, lr_0 = 7.4728e-04
Loss = 5.1096e-01, PNorm = 48.5178, GNorm = 1.4606, lr_0 = 7.4677e-04
Loss = 5.0333e-01, PNorm = 48.5250, GNorm = 1.3043, lr_0 = 7.4625e-04
Loss = 5.0993e-01, PNorm = 48.5392, GNorm = 1.0401, lr_0 = 7.4574e-04
Loss = 3.9479e-01, PNorm = 48.5540, GNorm = 1.0525, lr_0 = 7.4523e-04
Loss = 4.4761e-01, PNorm = 48.5634, GNorm = 1.4854, lr_0 = 7.4472e-04
Loss = 4.6119e-01, PNorm = 48.5746, GNorm = 1.2105, lr_0 = 7.4421e-04
Loss = 5.4343e-01, PNorm = 48.5978, GNorm = 1.2484, lr_0 = 7.4370e-04
Loss = 4.0599e-01, PNorm = 48.6145, GNorm = 1.2178, lr_0 = 7.4319e-04
Loss = 4.6317e-01, PNorm = 48.6313, GNorm = 2.0941, lr_0 = 7.4268e-04
Loss = 4.4891e-01, PNorm = 48.6406, GNorm = 1.6482, lr_0 = 7.4217e-04
Loss = 4.5690e-01, PNorm = 48.6599, GNorm = 1.8387, lr_0 = 7.4167e-04
Loss = 4.4904e-01, PNorm = 48.6730, GNorm = 0.9073, lr_0 = 7.4116e-04
Loss = 4.4537e-01, PNorm = 48.6883, GNorm = 2.2511, lr_0 = 7.4065e-04
Loss = 4.9241e-01, PNorm = 48.7140, GNorm = 1.2629, lr_0 = 7.4014e-04
Loss = 5.6621e-01, PNorm = 48.7369, GNorm = 1.6339, lr_0 = 7.3964e-04
Loss = 5.3132e-01, PNorm = 48.7554, GNorm = 1.4116, lr_0 = 7.3913e-04
Loss = 4.8422e-01, PNorm = 48.7782, GNorm = 1.6473, lr_0 = 7.3862e-04
Loss = 4.3803e-01, PNorm = 48.8012, GNorm = 1.1034, lr_0 = 7.3812e-04
Loss = 4.2543e-01, PNorm = 48.8122, GNorm = 1.3078, lr_0 = 7.3761e-04
Loss = 5.1586e-01, PNorm = 48.8266, GNorm = 1.9050, lr_0 = 7.3711e-04
Loss = 4.3780e-01, PNorm = 48.8448, GNorm = 1.4023, lr_0 = 7.3660e-04
Loss = 4.3659e-01, PNorm = 48.8580, GNorm = 1.0635, lr_0 = 7.3610e-04
Loss = 5.4272e-01, PNorm = 48.8776, GNorm = 1.1088, lr_0 = 7.3559e-04
Loss = 4.7234e-01, PNorm = 48.8975, GNorm = 3.4695, lr_0 = 7.3509e-04
Loss = 4.6721e-01, PNorm = 48.9121, GNorm = 1.6310, lr_0 = 7.3458e-04
Loss = 5.1411e-01, PNorm = 48.9288, GNorm = 1.3441, lr_0 = 7.3408e-04
Loss = 4.7667e-01, PNorm = 48.9479, GNorm = 2.3073, lr_0 = 7.3358e-04
Loss = 4.6976e-01, PNorm = 48.9593, GNorm = 1.5062, lr_0 = 7.3308e-04
Loss = 5.0529e-01, PNorm = 48.9736, GNorm = 1.0473, lr_0 = 7.3257e-04
Loss = 5.4152e-01, PNorm = 48.9902, GNorm = 1.9105, lr_0 = 7.3207e-04
Loss = 5.0576e-01, PNorm = 49.0034, GNorm = 1.7798, lr_0 = 7.3157e-04
Loss = 4.9864e-01, PNorm = 49.0241, GNorm = 0.9188, lr_0 = 7.3107e-04
Loss = 4.9366e-01, PNorm = 49.0364, GNorm = 2.2858, lr_0 = 7.3057e-04
Loss = 4.3916e-01, PNorm = 49.0553, GNorm = 1.1289, lr_0 = 7.3007e-04
Loss = 4.2112e-01, PNorm = 49.0659, GNorm = 1.2846, lr_0 = 7.2957e-04
Loss = 5.1132e-01, PNorm = 49.0813, GNorm = 1.5859, lr_0 = 7.2907e-04
Loss = 4.4428e-01, PNorm = 49.1008, GNorm = 1.3031, lr_0 = 7.2857e-04
Loss = 4.9066e-01, PNorm = 49.1181, GNorm = 1.0701, lr_0 = 7.2807e-04
Loss = 4.3183e-01, PNorm = 49.1351, GNorm = 1.3432, lr_0 = 7.2757e-04
Loss = 4.2458e-01, PNorm = 49.1501, GNorm = 1.7686, lr_0 = 7.2707e-04
Loss = 3.7116e-01, PNorm = 49.1655, GNorm = 1.4222, lr_0 = 7.2657e-04
Loss = 4.2780e-01, PNorm = 49.1834, GNorm = 1.0488, lr_0 = 7.2608e-04
Loss = 4.6303e-01, PNorm = 49.2014, GNorm = 1.2219, lr_0 = 7.2558e-04
Loss = 4.6262e-01, PNorm = 49.2163, GNorm = 1.9172, lr_0 = 7.2508e-04
Loss = 4.5902e-01, PNorm = 49.2326, GNorm = 1.0233, lr_0 = 7.2458e-04
Loss = 4.5602e-01, PNorm = 49.2420, GNorm = 2.0345, lr_0 = 7.2409e-04
Loss = 4.7514e-01, PNorm = 49.2581, GNorm = 1.0719, lr_0 = 7.2359e-04
Loss = 4.1669e-01, PNorm = 49.2830, GNorm = 1.1440, lr_0 = 7.2310e-04
Loss = 4.6937e-01, PNorm = 49.2939, GNorm = 3.0025, lr_0 = 7.2260e-04
Loss = 4.7655e-01, PNorm = 49.3094, GNorm = 1.4758, lr_0 = 7.2211e-04
Loss = 4.8902e-01, PNorm = 49.3295, GNorm = 0.9649, lr_0 = 7.2161e-04
Loss = 4.4470e-01, PNorm = 49.3461, GNorm = 1.5844, lr_0 = 7.2112e-04
Loss = 5.0453e-01, PNorm = 49.3602, GNorm = 1.3028, lr_0 = 7.2062e-04
Loss = 4.5538e-01, PNorm = 49.3730, GNorm = 1.0456, lr_0 = 7.2013e-04
Loss = 4.6401e-01, PNorm = 49.3882, GNorm = 2.4640, lr_0 = 7.1964e-04
Validation mae = 0.121051
Epoch 6
Loss = 4.3784e-01, PNorm = 49.4074, GNorm = 1.2254, lr_0 = 7.1914e-04
Loss = 4.1760e-01, PNorm = 49.4272, GNorm = 1.6424, lr_0 = 7.1865e-04
Loss = 4.3978e-01, PNorm = 49.4383, GNorm = 1.3500, lr_0 = 7.1816e-04
Loss = 5.2926e-01, PNorm = 49.4576, GNorm = 1.3594, lr_0 = 7.1767e-04
Loss = 4.8703e-01, PNorm = 49.4717, GNorm = 1.2759, lr_0 = 7.1717e-04
Loss = 4.9556e-01, PNorm = 49.4917, GNorm = 1.8450, lr_0 = 7.1668e-04
Loss = 4.4445e-01, PNorm = 49.5187, GNorm = 1.7779, lr_0 = 7.1619e-04
Loss = 4.5138e-01, PNorm = 49.5442, GNorm = 1.3778, lr_0 = 7.1570e-04
Loss = 4.3794e-01, PNorm = 49.5586, GNorm = 1.1281, lr_0 = 7.1521e-04
Loss = 5.1856e-01, PNorm = 49.5718, GNorm = 1.5969, lr_0 = 7.1472e-04
Loss = 4.8580e-01, PNorm = 49.5855, GNorm = 1.9051, lr_0 = 7.1423e-04
Loss = 3.9317e-01, PNorm = 49.6016, GNorm = 1.0889, lr_0 = 7.1374e-04
Loss = 4.6986e-01, PNorm = 49.6112, GNorm = 2.0987, lr_0 = 7.1325e-04
Loss = 4.4114e-01, PNorm = 49.6248, GNorm = 1.6657, lr_0 = 7.1277e-04
Loss = 5.1951e-01, PNorm = 49.6420, GNorm = 1.6547, lr_0 = 7.1228e-04
Loss = 4.2590e-01, PNorm = 49.6575, GNorm = 1.1664, lr_0 = 7.1179e-04
Loss = 4.7932e-01, PNorm = 49.6691, GNorm = 1.2390, lr_0 = 7.1130e-04
Loss = 5.1339e-01, PNorm = 49.6815, GNorm = 1.6434, lr_0 = 7.1081e-04
Loss = 4.3970e-01, PNorm = 49.6913, GNorm = 1.1813, lr_0 = 7.1033e-04
Loss = 4.6389e-01, PNorm = 49.7043, GNorm = 0.9426, lr_0 = 7.0984e-04
Loss = 4.3761e-01, PNorm = 49.7162, GNorm = 1.4437, lr_0 = 7.0935e-04
Loss = 4.8967e-01, PNorm = 49.7311, GNorm = 1.1645, lr_0 = 7.0887e-04
Loss = 4.4086e-01, PNorm = 49.7538, GNorm = 1.7741, lr_0 = 7.0838e-04
Loss = 4.9336e-01, PNorm = 49.7744, GNorm = 1.1642, lr_0 = 7.0790e-04
Loss = 4.9612e-01, PNorm = 49.7933, GNorm = 1.2168, lr_0 = 7.0741e-04
Loss = 3.8899e-01, PNorm = 49.8048, GNorm = 1.0953, lr_0 = 7.0693e-04
Loss = 4.1513e-01, PNorm = 49.8174, GNorm = 1.3435, lr_0 = 7.0644e-04
Loss = 4.8211e-01, PNorm = 49.8319, GNorm = 0.9456, lr_0 = 7.0596e-04
Loss = 4.3036e-01, PNorm = 49.8475, GNorm = 1.3999, lr_0 = 7.0548e-04
Loss = 4.1329e-01, PNorm = 49.8638, GNorm = 1.1514, lr_0 = 7.0499e-04
Loss = 4.5320e-01, PNorm = 49.8791, GNorm = 1.3550, lr_0 = 7.0451e-04
Loss = 5.3256e-01, PNorm = 49.8895, GNorm = 1.0998, lr_0 = 7.0403e-04
Loss = 4.4378e-01, PNorm = 49.9092, GNorm = 1.2456, lr_0 = 7.0354e-04
Loss = 4.2508e-01, PNorm = 49.9282, GNorm = 1.3972, lr_0 = 7.0306e-04
Loss = 4.9128e-01, PNorm = 49.9516, GNorm = 1.3219, lr_0 = 7.0258e-04
Loss = 4.4227e-01, PNorm = 49.9764, GNorm = 1.9773, lr_0 = 7.0210e-04
Loss = 4.1626e-01, PNorm = 49.9988, GNorm = 1.5781, lr_0 = 7.0162e-04
Loss = 5.8388e-01, PNorm = 50.0200, GNorm = 1.3199, lr_0 = 7.0114e-04
Loss = 4.9840e-01, PNorm = 50.0454, GNorm = 2.6676, lr_0 = 7.0066e-04
Loss = 4.7863e-01, PNorm = 50.0597, GNorm = 1.6874, lr_0 = 7.0018e-04
Loss = 5.3509e-01, PNorm = 50.0791, GNorm = 1.2118, lr_0 = 6.9970e-04
Loss = 4.2038e-01, PNorm = 50.0909, GNorm = 1.5029, lr_0 = 6.9922e-04
Loss = 4.7805e-01, PNorm = 50.1091, GNorm = 3.0536, lr_0 = 6.9874e-04
Loss = 5.1643e-01, PNorm = 50.1267, GNorm = 1.4480, lr_0 = 6.9826e-04
Loss = 5.2589e-01, PNorm = 50.1429, GNorm = 1.5098, lr_0 = 6.9778e-04
Loss = 4.1393e-01, PNorm = 50.1588, GNorm = 1.8642, lr_0 = 6.9730e-04
Loss = 4.4052e-01, PNorm = 50.1811, GNorm = 2.0046, lr_0 = 6.9683e-04
Loss = 4.7519e-01, PNorm = 50.1947, GNorm = 1.6439, lr_0 = 6.9635e-04
Loss = 4.1806e-01, PNorm = 50.2135, GNorm = 1.1415, lr_0 = 6.9587e-04
Loss = 4.5030e-01, PNorm = 50.2302, GNorm = 1.9223, lr_0 = 6.9540e-04
Loss = 4.0893e-01, PNorm = 50.2474, GNorm = 1.7878, lr_0 = 6.9492e-04
Loss = 4.0336e-01, PNorm = 50.2637, GNorm = 1.4766, lr_0 = 6.9444e-04
Loss = 4.2790e-01, PNorm = 50.2784, GNorm = 1.3416, lr_0 = 6.9397e-04
Loss = 5.4658e-01, PNorm = 50.2940, GNorm = 2.6174, lr_0 = 6.9349e-04
Loss = 5.1803e-01, PNorm = 50.3170, GNorm = 1.5404, lr_0 = 6.9302e-04
Loss = 4.8549e-01, PNorm = 50.3429, GNorm = 1.1712, lr_0 = 6.9254e-04
Loss = 4.1438e-01, PNorm = 50.3625, GNorm = 1.1793, lr_0 = 6.9207e-04
Loss = 4.4002e-01, PNorm = 50.3792, GNorm = 1.3328, lr_0 = 6.9159e-04
Loss = 3.9258e-01, PNorm = 50.3952, GNorm = 1.5248, lr_0 = 6.9112e-04
Loss = 4.3767e-01, PNorm = 50.4097, GNorm = 0.9254, lr_0 = 6.9065e-04
Loss = 4.8792e-01, PNorm = 50.4184, GNorm = 1.6164, lr_0 = 6.9017e-04
Loss = 5.1576e-01, PNorm = 50.4317, GNorm = 1.9030, lr_0 = 6.8970e-04
Loss = 4.4793e-01, PNorm = 50.4445, GNorm = 1.4731, lr_0 = 6.8923e-04
Loss = 4.5611e-01, PNorm = 50.4547, GNorm = 1.2655, lr_0 = 6.8876e-04
Loss = 4.2205e-01, PNorm = 50.4673, GNorm = 0.8764, lr_0 = 6.8828e-04
Loss = 4.3694e-01, PNorm = 50.4827, GNorm = 1.0684, lr_0 = 6.8781e-04
Loss = 4.1234e-01, PNorm = 50.4982, GNorm = 1.6952, lr_0 = 6.8734e-04
Loss = 4.0681e-01, PNorm = 50.5176, GNorm = 1.0533, lr_0 = 6.8687e-04
Loss = 4.9454e-01, PNorm = 50.5340, GNorm = 1.0512, lr_0 = 6.8640e-04
Loss = 4.3069e-01, PNorm = 50.5415, GNorm = 2.2685, lr_0 = 6.8593e-04
Loss = 4.6442e-01, PNorm = 50.5495, GNorm = 1.7177, lr_0 = 6.8546e-04
Loss = 4.1514e-01, PNorm = 50.5633, GNorm = 1.0147, lr_0 = 6.8499e-04
Loss = 4.6865e-01, PNorm = 50.5769, GNorm = 2.0662, lr_0 = 6.8452e-04
Loss = 4.4680e-01, PNorm = 50.5857, GNorm = 1.4414, lr_0 = 6.8405e-04
Loss = 4.7652e-01, PNorm = 50.5989, GNorm = 1.2293, lr_0 = 6.8358e-04
Loss = 4.8792e-01, PNorm = 50.6112, GNorm = 1.5630, lr_0 = 6.8312e-04
Loss = 4.4311e-01, PNorm = 50.6273, GNorm = 2.9121, lr_0 = 6.8265e-04
Loss = 5.0556e-01, PNorm = 50.6454, GNorm = 1.2326, lr_0 = 6.8218e-04
Loss = 4.1940e-01, PNorm = 50.6609, GNorm = 1.6977, lr_0 = 6.8171e-04
Loss = 3.8549e-01, PNorm = 50.6736, GNorm = 1.0087, lr_0 = 6.8125e-04
Loss = 4.5182e-01, PNorm = 50.6821, GNorm = 1.1292, lr_0 = 6.8078e-04
Loss = 4.7771e-01, PNorm = 50.6978, GNorm = 1.4388, lr_0 = 6.8031e-04
Loss = 4.2489e-01, PNorm = 50.7086, GNorm = 1.5245, lr_0 = 6.7985e-04
Loss = 4.4046e-01, PNorm = 50.7237, GNorm = 0.9063, lr_0 = 6.7938e-04
Loss = 4.5388e-01, PNorm = 50.7374, GNorm = 1.2753, lr_0 = 6.7892e-04
Loss = 4.0847e-01, PNorm = 50.7536, GNorm = 1.4112, lr_0 = 6.7845e-04
Loss = 4.4451e-01, PNorm = 50.7614, GNorm = 1.9691, lr_0 = 6.7799e-04
Loss = 5.0678e-01, PNorm = 50.7762, GNorm = 1.8006, lr_0 = 6.7752e-04
Loss = 4.3819e-01, PNorm = 50.7882, GNorm = 1.2369, lr_0 = 6.7706e-04
Loss = 4.2749e-01, PNorm = 50.8071, GNorm = 1.2281, lr_0 = 6.7659e-04
Loss = 4.0970e-01, PNorm = 50.8208, GNorm = 1.1484, lr_0 = 6.7613e-04
Loss = 5.0263e-01, PNorm = 50.8329, GNorm = 1.1229, lr_0 = 6.7567e-04
Loss = 4.0415e-01, PNorm = 50.8474, GNorm = 1.7478, lr_0 = 6.7520e-04
Loss = 4.4611e-01, PNorm = 50.8565, GNorm = 1.1753, lr_0 = 6.7474e-04
Loss = 4.2674e-01, PNorm = 50.8667, GNorm = 1.1711, lr_0 = 6.7428e-04
Loss = 4.1347e-01, PNorm = 50.8769, GNorm = 1.2585, lr_0 = 6.7382e-04
Loss = 4.2398e-01, PNorm = 50.8889, GNorm = 1.4818, lr_0 = 6.7335e-04
Loss = 4.6604e-01, PNorm = 50.9017, GNorm = 1.5141, lr_0 = 6.7289e-04
Loss = 4.3472e-01, PNorm = 50.9198, GNorm = 1.0573, lr_0 = 6.7243e-04
Loss = 4.4370e-01, PNorm = 50.9353, GNorm = 2.3755, lr_0 = 6.7197e-04
Loss = 4.5865e-01, PNorm = 50.9537, GNorm = 3.0359, lr_0 = 6.7151e-04
Loss = 5.2490e-01, PNorm = 50.9668, GNorm = 1.1741, lr_0 = 6.7105e-04
Loss = 5.4041e-01, PNorm = 50.9883, GNorm = 1.7174, lr_0 = 6.7059e-04
Loss = 4.7640e-01, PNorm = 51.0088, GNorm = 1.0367, lr_0 = 6.7013e-04
Loss = 5.0747e-01, PNorm = 51.0221, GNorm = 1.2606, lr_0 = 6.6967e-04
Loss = 4.5036e-01, PNorm = 51.0404, GNorm = 2.0612, lr_0 = 6.6921e-04
Loss = 5.1776e-01, PNorm = 51.0603, GNorm = 2.0375, lr_0 = 6.6876e-04
Loss = 4.5014e-01, PNorm = 51.0760, GNorm = 1.2006, lr_0 = 6.6830e-04
Loss = 4.0733e-01, PNorm = 51.0868, GNorm = 0.9052, lr_0 = 6.6784e-04
Loss = 4.3800e-01, PNorm = 51.0903, GNorm = 1.6337, lr_0 = 6.6738e-04
Loss = 4.7257e-01, PNorm = 51.0993, GNorm = 0.9320, lr_0 = 6.6693e-04
Loss = 4.6073e-01, PNorm = 51.1144, GNorm = 1.0782, lr_0 = 6.6647e-04
Loss = 4.5809e-01, PNorm = 51.1356, GNorm = 1.3254, lr_0 = 6.6601e-04
Loss = 4.9982e-01, PNorm = 51.1487, GNorm = 2.2876, lr_0 = 6.6556e-04
Loss = 4.6204e-01, PNorm = 51.1632, GNorm = 1.3304, lr_0 = 6.6510e-04
Loss = 4.3753e-01, PNorm = 51.1812, GNorm = 1.4051, lr_0 = 6.6464e-04
Loss = 4.0971e-01, PNorm = 51.1879, GNorm = 1.2108, lr_0 = 6.6419e-04
Loss = 4.8920e-01, PNorm = 51.2052, GNorm = 1.9585, lr_0 = 6.6373e-04
Loss = 4.9605e-01, PNorm = 51.2189, GNorm = 2.1414, lr_0 = 6.6328e-04
Loss = 4.2418e-01, PNorm = 51.2333, GNorm = 1.1682, lr_0 = 6.6282e-04
Validation mae = 0.119038
Epoch 7
Loss = 4.6106e-01, PNorm = 51.2527, GNorm = 1.6601, lr_0 = 6.6237e-04
Loss = 4.6221e-01, PNorm = 51.2724, GNorm = 1.0152, lr_0 = 6.6192e-04
Loss = 4.2910e-01, PNorm = 51.2848, GNorm = 1.3142, lr_0 = 6.6146e-04
Loss = 4.0908e-01, PNorm = 51.2979, GNorm = 1.5470, lr_0 = 6.6101e-04
Loss = 4.5523e-01, PNorm = 51.3088, GNorm = 1.7593, lr_0 = 6.6056e-04
Loss = 4.0579e-01, PNorm = 51.3277, GNorm = 1.2616, lr_0 = 6.6011e-04
Loss = 4.2070e-01, PNorm = 51.3422, GNorm = 1.0089, lr_0 = 6.5965e-04
Loss = 3.9534e-01, PNorm = 51.3622, GNorm = 1.1762, lr_0 = 6.5920e-04
Loss = 4.3973e-01, PNorm = 51.3789, GNorm = 0.9761, lr_0 = 6.5875e-04
Loss = 3.9848e-01, PNorm = 51.3938, GNorm = 1.9758, lr_0 = 6.5830e-04
Loss = 4.4597e-01, PNorm = 51.4041, GNorm = 1.3308, lr_0 = 6.5785e-04
Loss = 4.3947e-01, PNorm = 51.4129, GNorm = 1.0170, lr_0 = 6.5740e-04
Loss = 4.6278e-01, PNorm = 51.4323, GNorm = 1.3910, lr_0 = 6.5695e-04
Loss = 3.8181e-01, PNorm = 51.4514, GNorm = 1.3633, lr_0 = 6.5650e-04
Loss = 4.1941e-01, PNorm = 51.4667, GNorm = 1.6574, lr_0 = 6.5605e-04
Loss = 4.8955e-01, PNorm = 51.4839, GNorm = 1.2498, lr_0 = 6.5560e-04
Loss = 4.2201e-01, PNorm = 51.4933, GNorm = 0.8338, lr_0 = 6.5515e-04
Loss = 4.2343e-01, PNorm = 51.5080, GNorm = 1.8360, lr_0 = 6.5470e-04
Loss = 4.3714e-01, PNorm = 51.5186, GNorm = 1.4061, lr_0 = 6.5425e-04
Loss = 3.9094e-01, PNorm = 51.5328, GNorm = 1.3360, lr_0 = 6.5380e-04
Loss = 4.4941e-01, PNorm = 51.5417, GNorm = 1.0493, lr_0 = 6.5335e-04
Loss = 4.3771e-01, PNorm = 51.5574, GNorm = 0.8430, lr_0 = 6.5291e-04
Loss = 3.7097e-01, PNorm = 51.5781, GNorm = 1.2129, lr_0 = 6.5246e-04
Loss = 4.2752e-01, PNorm = 51.5801, GNorm = 1.3600, lr_0 = 6.5201e-04
Loss = 4.5191e-01, PNorm = 51.5904, GNorm = 2.8727, lr_0 = 6.5157e-04
Loss = 4.3655e-01, PNorm = 51.6012, GNorm = 1.5467, lr_0 = 6.5112e-04
Loss = 4.4935e-01, PNorm = 51.6140, GNorm = 1.6291, lr_0 = 6.5067e-04
Loss = 4.5532e-01, PNorm = 51.6275, GNorm = 1.5523, lr_0 = 6.5023e-04
Loss = 4.7428e-01, PNorm = 51.6447, GNorm = 2.3627, lr_0 = 6.4978e-04
Loss = 4.3506e-01, PNorm = 51.6587, GNorm = 2.1519, lr_0 = 6.4934e-04
Loss = 4.4612e-01, PNorm = 51.6739, GNorm = 1.0358, lr_0 = 6.4889e-04
Loss = 4.3134e-01, PNorm = 51.6925, GNorm = 1.3509, lr_0 = 6.4845e-04
Loss = 4.7422e-01, PNorm = 51.7081, GNorm = 2.1123, lr_0 = 6.4800e-04
Loss = 4.7420e-01, PNorm = 51.7302, GNorm = 1.1133, lr_0 = 6.4756e-04
Loss = 4.5709e-01, PNorm = 51.7447, GNorm = 1.0716, lr_0 = 6.4712e-04
Loss = 5.0159e-01, PNorm = 51.7542, GNorm = 1.2122, lr_0 = 6.4667e-04
Loss = 4.0056e-01, PNorm = 51.7651, GNorm = 1.9378, lr_0 = 6.4623e-04
Loss = 4.5194e-01, PNorm = 51.7794, GNorm = 0.8829, lr_0 = 6.4579e-04
Loss = 4.5487e-01, PNorm = 51.7933, GNorm = 2.1476, lr_0 = 6.4534e-04
Loss = 3.8909e-01, PNorm = 51.8092, GNorm = 1.2070, lr_0 = 6.4490e-04
Loss = 4.9363e-01, PNorm = 51.8264, GNorm = 1.3446, lr_0 = 6.4446e-04
Loss = 4.8908e-01, PNorm = 51.8443, GNorm = 2.5313, lr_0 = 6.4402e-04
Loss = 4.4753e-01, PNorm = 51.8629, GNorm = 0.8497, lr_0 = 6.4358e-04
Loss = 4.5085e-01, PNorm = 51.8791, GNorm = 1.0187, lr_0 = 6.4314e-04
Loss = 4.6492e-01, PNorm = 51.8979, GNorm = 2.2421, lr_0 = 6.4270e-04
Loss = 3.6667e-01, PNorm = 51.9100, GNorm = 0.8623, lr_0 = 6.4226e-04
Loss = 4.8271e-01, PNorm = 51.9260, GNorm = 1.4093, lr_0 = 6.4182e-04
Loss = 4.6643e-01, PNorm = 51.9445, GNorm = 1.2916, lr_0 = 6.4138e-04
Loss = 3.7954e-01, PNorm = 51.9592, GNorm = 1.8387, lr_0 = 6.4094e-04
Loss = 4.1763e-01, PNorm = 51.9669, GNorm = 1.1673, lr_0 = 6.4050e-04
Loss = 4.7580e-01, PNorm = 51.9725, GNorm = 1.1943, lr_0 = 6.4006e-04
Loss = 4.2561e-01, PNorm = 51.9874, GNorm = 1.3353, lr_0 = 6.3962e-04
Loss = 4.1014e-01, PNorm = 51.9993, GNorm = 1.6719, lr_0 = 6.3918e-04
Loss = 4.7946e-01, PNorm = 52.0096, GNorm = 1.3003, lr_0 = 6.3874e-04
Loss = 4.3914e-01, PNorm = 52.0211, GNorm = 1.6185, lr_0 = 6.3831e-04
Loss = 4.8214e-01, PNorm = 52.0341, GNorm = 1.9430, lr_0 = 6.3787e-04
Loss = 4.5731e-01, PNorm = 52.0502, GNorm = 1.3229, lr_0 = 6.3743e-04
Loss = 4.4429e-01, PNorm = 52.0609, GNorm = 1.3885, lr_0 = 6.3700e-04
Loss = 5.0111e-01, PNorm = 52.0770, GNorm = 1.4760, lr_0 = 6.3656e-04
Loss = 4.8906e-01, PNorm = 52.0879, GNorm = 1.7324, lr_0 = 6.3612e-04
Loss = 4.9098e-01, PNorm = 52.1028, GNorm = 1.3034, lr_0 = 6.3569e-04
Loss = 4.6338e-01, PNorm = 52.1093, GNorm = 3.1401, lr_0 = 6.3525e-04
Loss = 4.5971e-01, PNorm = 52.1186, GNorm = 1.1310, lr_0 = 6.3482e-04
Loss = 4.2110e-01, PNorm = 52.1428, GNorm = 1.3065, lr_0 = 6.3438e-04
Loss = 4.3509e-01, PNorm = 52.1702, GNorm = 1.8708, lr_0 = 6.3395e-04
Loss = 4.4765e-01, PNorm = 52.1848, GNorm = 2.0387, lr_0 = 6.3351e-04
Loss = 4.8598e-01, PNorm = 52.1978, GNorm = 1.1580, lr_0 = 6.3308e-04
Loss = 4.8602e-01, PNorm = 52.2104, GNorm = 1.5048, lr_0 = 6.3265e-04
Loss = 4.8323e-01, PNorm = 52.2177, GNorm = 1.1523, lr_0 = 6.3221e-04
Loss = 4.2282e-01, PNorm = 52.2315, GNorm = 1.0578, lr_0 = 6.3178e-04
Loss = 4.0170e-01, PNorm = 52.2375, GNorm = 1.4973, lr_0 = 6.3135e-04
Loss = 4.2400e-01, PNorm = 52.2493, GNorm = 1.3798, lr_0 = 6.3091e-04
Loss = 4.3065e-01, PNorm = 52.2630, GNorm = 1.1857, lr_0 = 6.3048e-04
Loss = 4.3056e-01, PNorm = 52.2766, GNorm = 1.5072, lr_0 = 6.3005e-04
Loss = 4.2652e-01, PNorm = 52.2835, GNorm = 1.1107, lr_0 = 6.2962e-04
Loss = 4.6986e-01, PNorm = 52.2864, GNorm = 1.2228, lr_0 = 6.2919e-04
Loss = 4.2156e-01, PNorm = 52.2945, GNorm = 1.5364, lr_0 = 6.2876e-04
Loss = 5.3202e-01, PNorm = 52.3114, GNorm = 2.3538, lr_0 = 6.2833e-04
Loss = 4.2288e-01, PNorm = 52.3295, GNorm = 1.2726, lr_0 = 6.2789e-04
Loss = 4.2151e-01, PNorm = 52.3467, GNorm = 0.9771, lr_0 = 6.2746e-04
Loss = 4.4431e-01, PNorm = 52.3590, GNorm = 1.1038, lr_0 = 6.2703e-04
Loss = 3.9962e-01, PNorm = 52.3689, GNorm = 1.7652, lr_0 = 6.2661e-04
Loss = 4.2667e-01, PNorm = 52.3825, GNorm = 1.2778, lr_0 = 6.2618e-04
Loss = 4.5169e-01, PNorm = 52.3992, GNorm = 1.9616, lr_0 = 6.2575e-04
Loss = 4.4521e-01, PNorm = 52.4021, GNorm = 1.1018, lr_0 = 6.2532e-04
Loss = 4.7485e-01, PNorm = 52.4148, GNorm = 0.8950, lr_0 = 6.2489e-04
Loss = 4.9357e-01, PNorm = 52.4308, GNorm = 2.6744, lr_0 = 6.2446e-04
Loss = 4.0011e-01, PNorm = 52.4442, GNorm = 1.2440, lr_0 = 6.2403e-04
Loss = 5.1371e-01, PNorm = 52.4605, GNorm = 1.3450, lr_0 = 6.2361e-04
Loss = 4.2550e-01, PNorm = 52.4708, GNorm = 1.4419, lr_0 = 6.2318e-04
Loss = 4.0011e-01, PNorm = 52.4822, GNorm = 1.6742, lr_0 = 6.2275e-04
Loss = 4.5492e-01, PNorm = 52.4948, GNorm = 1.0924, lr_0 = 6.2233e-04
Loss = 4.6093e-01, PNorm = 52.5070, GNorm = 1.3054, lr_0 = 6.2190e-04
Loss = 4.9774e-01, PNorm = 52.5147, GNorm = 1.3061, lr_0 = 6.2147e-04
Loss = 4.8765e-01, PNorm = 52.5248, GNorm = 1.7488, lr_0 = 6.2105e-04
Loss = 4.3322e-01, PNorm = 52.5392, GNorm = 0.9568, lr_0 = 6.2062e-04
Loss = 4.9086e-01, PNorm = 52.5579, GNorm = 1.0530, lr_0 = 6.2020e-04
Loss = 4.8371e-01, PNorm = 52.5749, GNorm = 1.9472, lr_0 = 6.1977e-04
Loss = 4.5824e-01, PNorm = 52.5875, GNorm = 1.1814, lr_0 = 6.1935e-04
Loss = 4.5241e-01, PNorm = 52.5997, GNorm = 2.0093, lr_0 = 6.1892e-04
Loss = 4.7411e-01, PNorm = 52.6122, GNorm = 1.2955, lr_0 = 6.1850e-04
Loss = 4.4713e-01, PNorm = 52.6266, GNorm = 1.2977, lr_0 = 6.1808e-04
Loss = 4.2508e-01, PNorm = 52.6340, GNorm = 1.0523, lr_0 = 6.1765e-04
Loss = 4.1858e-01, PNorm = 52.6384, GNorm = 1.0758, lr_0 = 6.1723e-04
Loss = 5.2678e-01, PNorm = 52.6510, GNorm = 1.1126, lr_0 = 6.1681e-04
Loss = 4.2385e-01, PNorm = 52.6629, GNorm = 1.2363, lr_0 = 6.1638e-04
Loss = 4.5858e-01, PNorm = 52.6737, GNorm = 1.3618, lr_0 = 6.1596e-04
Loss = 4.3919e-01, PNorm = 52.6898, GNorm = 1.4426, lr_0 = 6.1554e-04
Loss = 4.8311e-01, PNorm = 52.7080, GNorm = 1.0313, lr_0 = 6.1512e-04
Loss = 4.8672e-01, PNorm = 52.7241, GNorm = 1.5450, lr_0 = 6.1470e-04
Loss = 4.0159e-01, PNorm = 52.7359, GNorm = 1.3216, lr_0 = 6.1428e-04
Loss = 4.8118e-01, PNorm = 52.7464, GNorm = 1.5886, lr_0 = 6.1385e-04
Loss = 4.4170e-01, PNorm = 52.7499, GNorm = 1.0556, lr_0 = 6.1343e-04
Loss = 4.8265e-01, PNorm = 52.7617, GNorm = 2.9604, lr_0 = 6.1301e-04
Loss = 4.5087e-01, PNorm = 52.7789, GNorm = 1.0665, lr_0 = 6.1259e-04
Loss = 4.1916e-01, PNorm = 52.7937, GNorm = 1.4700, lr_0 = 6.1217e-04
Loss = 4.1938e-01, PNorm = 52.8006, GNorm = 2.0093, lr_0 = 6.1175e-04
Loss = 4.1489e-01, PNorm = 52.8112, GNorm = 0.9908, lr_0 = 6.1134e-04
Loss = 4.0998e-01, PNorm = 52.8221, GNorm = 1.4207, lr_0 = 6.1092e-04
Loss = 4.8913e-01, PNorm = 52.8333, GNorm = 1.2676, lr_0 = 6.1050e-04
Validation mae = 0.117827
Epoch 8
Loss = 4.2002e-01, PNorm = 52.8468, GNorm = 1.2846, lr_0 = 6.1008e-04
Loss = 4.1521e-01, PNorm = 52.8570, GNorm = 1.2963, lr_0 = 6.0966e-04
Loss = 4.1234e-01, PNorm = 52.8650, GNorm = 1.1502, lr_0 = 6.0924e-04
Loss = 4.4296e-01, PNorm = 52.8757, GNorm = 1.5386, lr_0 = 6.0883e-04
Loss = 4.2198e-01, PNorm = 52.8949, GNorm = 1.1909, lr_0 = 6.0841e-04
Loss = 5.0453e-01, PNorm = 52.9079, GNorm = 1.6297, lr_0 = 6.0799e-04
Loss = 4.3605e-01, PNorm = 52.9364, GNorm = 1.9147, lr_0 = 6.0758e-04
Loss = 4.2825e-01, PNorm = 52.9489, GNorm = 1.6835, lr_0 = 6.0716e-04
Loss = 4.5484e-01, PNorm = 52.9647, GNorm = 0.9286, lr_0 = 6.0674e-04
Loss = 4.6015e-01, PNorm = 52.9731, GNorm = 2.3441, lr_0 = 6.0633e-04
Loss = 4.6308e-01, PNorm = 52.9842, GNorm = 2.1462, lr_0 = 6.0591e-04
Loss = 4.0462e-01, PNorm = 53.0004, GNorm = 1.6491, lr_0 = 6.0550e-04
Loss = 4.0482e-01, PNorm = 53.0125, GNorm = 0.6943, lr_0 = 6.0508e-04
Loss = 4.1543e-01, PNorm = 53.0231, GNorm = 1.3719, lr_0 = 6.0467e-04
Loss = 3.9818e-01, PNorm = 53.0317, GNorm = 1.6670, lr_0 = 6.0425e-04
Loss = 4.2636e-01, PNorm = 53.0419, GNorm = 1.3210, lr_0 = 6.0384e-04
Loss = 4.9634e-01, PNorm = 53.0463, GNorm = 2.5588, lr_0 = 6.0343e-04
Loss = 3.9923e-01, PNorm = 53.0632, GNorm = 0.8210, lr_0 = 6.0301e-04
Loss = 4.4092e-01, PNorm = 53.0778, GNorm = 1.0512, lr_0 = 6.0260e-04
Loss = 4.0237e-01, PNorm = 53.0930, GNorm = 0.9209, lr_0 = 6.0219e-04
Loss = 4.3619e-01, PNorm = 53.1066, GNorm = 1.5572, lr_0 = 6.0178e-04
Loss = 4.0695e-01, PNorm = 53.1207, GNorm = 1.5041, lr_0 = 6.0136e-04
Loss = 4.9036e-01, PNorm = 53.1346, GNorm = 1.1209, lr_0 = 6.0095e-04
Loss = 4.2316e-01, PNorm = 53.1472, GNorm = 1.7417, lr_0 = 6.0054e-04
Loss = 4.8609e-01, PNorm = 53.1599, GNorm = 1.2324, lr_0 = 6.0013e-04
Loss = 3.9993e-01, PNorm = 53.1738, GNorm = 1.3342, lr_0 = 5.9972e-04
Loss = 4.0135e-01, PNorm = 53.1870, GNorm = 1.0296, lr_0 = 5.9931e-04
Loss = 4.0231e-01, PNorm = 53.2017, GNorm = 0.9780, lr_0 = 5.9890e-04
Loss = 4.5095e-01, PNorm = 53.2189, GNorm = 0.7701, lr_0 = 5.9849e-04
Loss = 4.5239e-01, PNorm = 53.2305, GNorm = 1.5501, lr_0 = 5.9808e-04
Loss = 4.4355e-01, PNorm = 53.2421, GNorm = 1.5740, lr_0 = 5.9767e-04
Loss = 4.0487e-01, PNorm = 53.2467, GNorm = 1.5069, lr_0 = 5.9726e-04
Loss = 3.6085e-01, PNorm = 53.2542, GNorm = 0.7714, lr_0 = 5.9685e-04
Loss = 4.8332e-01, PNorm = 53.2664, GNorm = 0.9230, lr_0 = 5.9644e-04
Loss = 4.4142e-01, PNorm = 53.2818, GNorm = 1.3441, lr_0 = 5.9603e-04
Loss = 4.5250e-01, PNorm = 53.2930, GNorm = 1.4858, lr_0 = 5.9562e-04
Loss = 4.6914e-01, PNorm = 53.3035, GNorm = 1.6551, lr_0 = 5.9521e-04
Loss = 4.1741e-01, PNorm = 53.3184, GNorm = 1.5778, lr_0 = 5.9481e-04
Loss = 4.2559e-01, PNorm = 53.3271, GNorm = 1.6662, lr_0 = 5.9440e-04
Loss = 4.2428e-01, PNorm = 53.3401, GNorm = 1.2563, lr_0 = 5.9399e-04
Loss = 4.2772e-01, PNorm = 53.3518, GNorm = 2.7533, lr_0 = 5.9358e-04
Loss = 4.3679e-01, PNorm = 53.3571, GNorm = 1.7254, lr_0 = 5.9318e-04
Loss = 3.9733e-01, PNorm = 53.3655, GNorm = 1.2839, lr_0 = 5.9277e-04
Loss = 4.3660e-01, PNorm = 53.3775, GNorm = 1.1398, lr_0 = 5.9236e-04
Loss = 4.2275e-01, PNorm = 53.3891, GNorm = 1.3929, lr_0 = 5.9196e-04
Loss = 4.1213e-01, PNorm = 53.3971, GNorm = 1.2596, lr_0 = 5.9155e-04
Loss = 4.3229e-01, PNorm = 53.4108, GNorm = 2.0009, lr_0 = 5.9115e-04
Loss = 4.9218e-01, PNorm = 53.4284, GNorm = 1.9704, lr_0 = 5.9074e-04
Loss = 4.4635e-01, PNorm = 53.4366, GNorm = 0.8554, lr_0 = 5.9034e-04
Loss = 5.2194e-01, PNorm = 53.4465, GNorm = 1.6658, lr_0 = 5.8993e-04
Loss = 4.0430e-01, PNorm = 53.4602, GNorm = 1.5693, lr_0 = 5.8953e-04
Loss = 4.5571e-01, PNorm = 53.4744, GNorm = 2.0854, lr_0 = 5.8913e-04
Loss = 5.7821e-01, PNorm = 53.4850, GNorm = 1.8166, lr_0 = 5.8872e-04
Loss = 4.6974e-01, PNorm = 53.5016, GNorm = 1.2037, lr_0 = 5.8832e-04
Loss = 3.7291e-01, PNorm = 53.5148, GNorm = 0.8429, lr_0 = 5.8792e-04
Loss = 4.3204e-01, PNorm = 53.5330, GNorm = 1.2347, lr_0 = 5.8751e-04
Loss = 4.1479e-01, PNorm = 53.5447, GNorm = 1.1131, lr_0 = 5.8711e-04
Loss = 4.8064e-01, PNorm = 53.5500, GNorm = 1.1477, lr_0 = 5.8671e-04
Loss = 4.3965e-01, PNorm = 53.5573, GNorm = 1.2118, lr_0 = 5.8631e-04
Loss = 3.9471e-01, PNorm = 53.5666, GNorm = 1.6081, lr_0 = 5.8591e-04
Loss = 4.3717e-01, PNorm = 53.5753, GNorm = 2.0829, lr_0 = 5.8550e-04
Loss = 4.3688e-01, PNorm = 53.5821, GNorm = 1.1202, lr_0 = 5.8510e-04
Loss = 4.1071e-01, PNorm = 53.5957, GNorm = 1.8511, lr_0 = 5.8470e-04
Loss = 4.6088e-01, PNorm = 53.6144, GNorm = 1.0949, lr_0 = 5.8430e-04
Loss = 4.0907e-01, PNorm = 53.6301, GNorm = 0.8524, lr_0 = 5.8390e-04
Loss = 4.0352e-01, PNorm = 53.6427, GNorm = 1.1150, lr_0 = 5.8350e-04
Loss = 4.0001e-01, PNorm = 53.6548, GNorm = 1.1508, lr_0 = 5.8310e-04
Loss = 4.2622e-01, PNorm = 53.6633, GNorm = 1.7223, lr_0 = 5.8270e-04
Loss = 3.8928e-01, PNorm = 53.6670, GNorm = 1.3131, lr_0 = 5.8230e-04
Loss = 4.6389e-01, PNorm = 53.6704, GNorm = 1.2927, lr_0 = 5.8190e-04
Loss = 4.2808e-01, PNorm = 53.6822, GNorm = 2.1564, lr_0 = 5.8151e-04
Loss = 3.8079e-01, PNorm = 53.6977, GNorm = 1.4228, lr_0 = 5.8111e-04
Loss = 4.6007e-01, PNorm = 53.7142, GNorm = 1.4343, lr_0 = 5.8071e-04
Loss = 4.4163e-01, PNorm = 53.7258, GNorm = 1.2724, lr_0 = 5.8031e-04
Loss = 4.3877e-01, PNorm = 53.7382, GNorm = 1.1520, lr_0 = 5.7991e-04
Loss = 4.3945e-01, PNorm = 53.7489, GNorm = 2.1691, lr_0 = 5.7952e-04
Loss = 4.5209e-01, PNorm = 53.7625, GNorm = 1.3803, lr_0 = 5.7912e-04
Loss = 4.4822e-01, PNorm = 53.7764, GNorm = 1.8452, lr_0 = 5.7872e-04
Loss = 4.5156e-01, PNorm = 53.7823, GNorm = 1.3234, lr_0 = 5.7833e-04
Loss = 4.3836e-01, PNorm = 53.7934, GNorm = 1.2820, lr_0 = 5.7793e-04
Loss = 4.5556e-01, PNorm = 53.8015, GNorm = 1.1473, lr_0 = 5.7753e-04
Loss = 4.0412e-01, PNorm = 53.8171, GNorm = 1.3345, lr_0 = 5.7714e-04
Loss = 4.0990e-01, PNorm = 53.8322, GNorm = 1.3065, lr_0 = 5.7674e-04
Loss = 3.9759e-01, PNorm = 53.8437, GNorm = 1.0214, lr_0 = 5.7635e-04
Loss = 4.1100e-01, PNorm = 53.8542, GNorm = 2.3144, lr_0 = 5.7595e-04
Loss = 4.3965e-01, PNorm = 53.8668, GNorm = 2.3366, lr_0 = 5.7556e-04
Loss = 4.7563e-01, PNorm = 53.8749, GNorm = 1.1480, lr_0 = 5.7516e-04
Loss = 4.5384e-01, PNorm = 53.8817, GNorm = 1.2771, lr_0 = 5.7477e-04
Loss = 4.6609e-01, PNorm = 53.8934, GNorm = 1.1937, lr_0 = 5.7438e-04
Loss = 4.5238e-01, PNorm = 53.9032, GNorm = 1.2809, lr_0 = 5.7398e-04
Loss = 4.7619e-01, PNorm = 53.9151, GNorm = 1.2122, lr_0 = 5.7359e-04
Loss = 4.3993e-01, PNorm = 53.9259, GNorm = 1.5969, lr_0 = 5.7320e-04
Loss = 4.5479e-01, PNorm = 53.9415, GNorm = 1.5406, lr_0 = 5.7280e-04
Loss = 4.1399e-01, PNorm = 53.9500, GNorm = 1.0705, lr_0 = 5.7241e-04
Loss = 5.0609e-01, PNorm = 53.9649, GNorm = 1.5957, lr_0 = 5.7202e-04
Loss = 4.2841e-01, PNorm = 53.9849, GNorm = 1.3191, lr_0 = 5.7163e-04
Loss = 4.2410e-01, PNorm = 53.9989, GNorm = 1.0667, lr_0 = 5.7124e-04
Loss = 4.3755e-01, PNorm = 54.0064, GNorm = 1.2910, lr_0 = 5.7084e-04
Loss = 4.1329e-01, PNorm = 54.0131, GNorm = 1.3246, lr_0 = 5.7045e-04
Loss = 4.0915e-01, PNorm = 54.0222, GNorm = 0.7917, lr_0 = 5.7006e-04
Loss = 4.1857e-01, PNorm = 54.0322, GNorm = 1.1319, lr_0 = 5.6967e-04
Loss = 3.6613e-01, PNorm = 54.0428, GNorm = 1.2238, lr_0 = 5.6928e-04
Loss = 4.4524e-01, PNorm = 54.0447, GNorm = 1.1155, lr_0 = 5.6889e-04
Loss = 4.1070e-01, PNorm = 54.0563, GNorm = 1.3350, lr_0 = 5.6850e-04
Loss = 4.7012e-01, PNorm = 54.0642, GNorm = 1.2078, lr_0 = 5.6811e-04
Loss = 4.9033e-01, PNorm = 54.0736, GNorm = 1.3770, lr_0 = 5.6772e-04
Loss = 4.1140e-01, PNorm = 54.0898, GNorm = 0.9610, lr_0 = 5.6733e-04
Loss = 4.6571e-01, PNorm = 54.1022, GNorm = 1.7034, lr_0 = 5.6695e-04
Loss = 4.5823e-01, PNorm = 54.1200, GNorm = 4.4141, lr_0 = 5.6656e-04
Loss = 4.2541e-01, PNorm = 54.1298, GNorm = 1.1115, lr_0 = 5.6617e-04
Loss = 4.2943e-01, PNorm = 54.1426, GNorm = 2.9620, lr_0 = 5.6578e-04
Loss = 4.6517e-01, PNorm = 54.1464, GNorm = 1.4253, lr_0 = 5.6539e-04
Loss = 4.1370e-01, PNorm = 54.1546, GNorm = 0.9908, lr_0 = 5.6501e-04
Loss = 4.3773e-01, PNorm = 54.1704, GNorm = 1.3347, lr_0 = 5.6462e-04
Loss = 3.9059e-01, PNorm = 54.1841, GNorm = 1.3545, lr_0 = 5.6423e-04
Loss = 4.1599e-01, PNorm = 54.1931, GNorm = 1.1481, lr_0 = 5.6385e-04
Loss = 4.1674e-01, PNorm = 54.2005, GNorm = 3.9742, lr_0 = 5.6346e-04
Loss = 4.1862e-01, PNorm = 54.2111, GNorm = 1.2069, lr_0 = 5.6307e-04
Loss = 4.7978e-01, PNorm = 54.2209, GNorm = 2.0186, lr_0 = 5.6269e-04
Loss = 4.5269e-01, PNorm = 54.2364, GNorm = 1.2875, lr_0 = 5.6230e-04
Validation mae = 0.115494
Epoch 9
Loss = 4.5355e-01, PNorm = 54.2466, GNorm = 1.9402, lr_0 = 5.6192e-04
Loss = 4.2284e-01, PNorm = 54.2526, GNorm = 1.5682, lr_0 = 5.6153e-04
Loss = 3.9079e-01, PNorm = 54.2588, GNorm = 0.9926, lr_0 = 5.6115e-04
Loss = 4.0937e-01, PNorm = 54.2686, GNorm = 1.0754, lr_0 = 5.6076e-04
Loss = 4.5106e-01, PNorm = 54.2747, GNorm = 1.3338, lr_0 = 5.6038e-04
Loss = 3.5985e-01, PNorm = 54.2844, GNorm = 0.8290, lr_0 = 5.6000e-04
Loss = 3.9354e-01, PNorm = 54.2978, GNorm = 1.2816, lr_0 = 5.5961e-04
Loss = 3.2954e-01, PNorm = 54.3165, GNorm = 1.3054, lr_0 = 5.5923e-04
Loss = 4.4397e-01, PNorm = 54.3232, GNorm = 1.9373, lr_0 = 5.5885e-04
Loss = 4.6836e-01, PNorm = 54.3353, GNorm = 2.1200, lr_0 = 5.5846e-04
Loss = 4.1145e-01, PNorm = 54.3474, GNorm = 1.0643, lr_0 = 5.5808e-04
Loss = 4.9941e-01, PNorm = 54.3594, GNorm = 2.6631, lr_0 = 5.5770e-04
Loss = 3.9429e-01, PNorm = 54.3733, GNorm = 1.1540, lr_0 = 5.5732e-04
Loss = 4.2270e-01, PNorm = 54.3902, GNorm = 1.3350, lr_0 = 5.5693e-04
Loss = 4.5218e-01, PNorm = 54.3971, GNorm = 1.6653, lr_0 = 5.5655e-04
Loss = 4.8568e-01, PNorm = 54.4041, GNorm = 1.5265, lr_0 = 5.5617e-04
Loss = 3.7719e-01, PNorm = 54.4150, GNorm = 0.8999, lr_0 = 5.5579e-04
Loss = 4.0723e-01, PNorm = 54.4243, GNorm = 1.5766, lr_0 = 5.5541e-04
Loss = 5.0341e-01, PNorm = 54.4362, GNorm = 1.9661, lr_0 = 5.5503e-04
Loss = 4.2145e-01, PNorm = 54.4462, GNorm = 1.4604, lr_0 = 5.5465e-04
Loss = 4.2717e-01, PNorm = 54.4554, GNorm = 1.2923, lr_0 = 5.5427e-04
Loss = 4.3480e-01, PNorm = 54.4652, GNorm = 1.1643, lr_0 = 5.5389e-04
Loss = 4.2440e-01, PNorm = 54.4736, GNorm = 1.8413, lr_0 = 5.5351e-04
Loss = 4.6464e-01, PNorm = 54.4866, GNorm = 1.6419, lr_0 = 5.5313e-04
Loss = 4.1643e-01, PNorm = 54.5058, GNorm = 2.7447, lr_0 = 5.5275e-04
Loss = 4.3353e-01, PNorm = 54.5175, GNorm = 1.3350, lr_0 = 5.5237e-04
Loss = 3.6099e-01, PNorm = 54.5249, GNorm = 1.0752, lr_0 = 5.5199e-04
Loss = 4.1555e-01, PNorm = 54.5373, GNorm = 1.9496, lr_0 = 5.5162e-04
Loss = 4.8946e-01, PNorm = 54.5531, GNorm = 1.2961, lr_0 = 5.5124e-04
Loss = 4.0101e-01, PNorm = 54.5665, GNorm = 1.4350, lr_0 = 5.5086e-04
Loss = 4.2129e-01, PNorm = 54.5827, GNorm = 1.1253, lr_0 = 5.5048e-04
Loss = 4.1180e-01, PNorm = 54.5977, GNorm = 2.8653, lr_0 = 5.5011e-04
Loss = 3.9885e-01, PNorm = 54.6162, GNorm = 1.1534, lr_0 = 5.4973e-04
Loss = 4.4227e-01, PNorm = 54.6294, GNorm = 2.2272, lr_0 = 5.4935e-04
Loss = 4.4362e-01, PNorm = 54.6370, GNorm = 2.8379, lr_0 = 5.4898e-04
Loss = 4.1376e-01, PNorm = 54.6438, GNorm = 2.6842, lr_0 = 5.4860e-04
Loss = 3.9214e-01, PNorm = 54.6555, GNorm = 1.0703, lr_0 = 5.4822e-04
Loss = 4.1775e-01, PNorm = 54.6599, GNorm = 1.3588, lr_0 = 5.4785e-04
Loss = 4.3023e-01, PNorm = 54.6668, GNorm = 2.1678, lr_0 = 5.4747e-04
Loss = 4.1103e-01, PNorm = 54.6771, GNorm = 1.6304, lr_0 = 5.4710e-04
Loss = 4.7149e-01, PNorm = 54.6880, GNorm = 1.3284, lr_0 = 5.4672e-04
Loss = 3.9016e-01, PNorm = 54.6943, GNorm = 2.0202, lr_0 = 5.4635e-04
Loss = 4.0540e-01, PNorm = 54.7087, GNorm = 1.5340, lr_0 = 5.4597e-04
Loss = 5.0806e-01, PNorm = 54.7210, GNorm = 1.3835, lr_0 = 5.4560e-04
Loss = 4.5790e-01, PNorm = 54.7321, GNorm = 1.3754, lr_0 = 5.4523e-04
Loss = 4.0409e-01, PNorm = 54.7427, GNorm = 1.1754, lr_0 = 5.4485e-04
Loss = 3.8910e-01, PNorm = 54.7481, GNorm = 1.2249, lr_0 = 5.4448e-04
Loss = 4.0654e-01, PNorm = 54.7559, GNorm = 1.4678, lr_0 = 5.4411e-04
Loss = 3.5884e-01, PNorm = 54.7663, GNorm = 1.8627, lr_0 = 5.4373e-04
Loss = 4.0013e-01, PNorm = 54.7794, GNorm = 1.1937, lr_0 = 5.4336e-04
Loss = 4.3482e-01, PNorm = 54.7873, GNorm = 1.4585, lr_0 = 5.4299e-04
Loss = 4.4908e-01, PNorm = 54.7993, GNorm = 1.1957, lr_0 = 5.4262e-04
Loss = 4.2645e-01, PNorm = 54.8019, GNorm = 1.5053, lr_0 = 5.4225e-04
Loss = 4.8313e-01, PNorm = 54.8123, GNorm = 1.2379, lr_0 = 5.4187e-04
Loss = 4.0966e-01, PNorm = 54.8232, GNorm = 1.4615, lr_0 = 5.4150e-04
Loss = 4.3595e-01, PNorm = 54.8341, GNorm = 1.1873, lr_0 = 5.4113e-04
Loss = 3.9353e-01, PNorm = 54.8464, GNorm = 1.2997, lr_0 = 5.4076e-04
Loss = 4.5310e-01, PNorm = 54.8592, GNorm = 1.4544, lr_0 = 5.4039e-04
Loss = 3.8670e-01, PNorm = 54.8657, GNorm = 1.3006, lr_0 = 5.4002e-04
Loss = 3.7928e-01, PNorm = 54.8746, GNorm = 1.2100, lr_0 = 5.3965e-04
Loss = 4.2320e-01, PNorm = 54.8801, GNorm = 1.1447, lr_0 = 5.3928e-04
Loss = 3.9038e-01, PNorm = 54.8889, GNorm = 0.9597, lr_0 = 5.3891e-04
Loss = 4.0225e-01, PNorm = 54.8985, GNorm = 1.8685, lr_0 = 5.3854e-04
Loss = 4.0567e-01, PNorm = 54.9141, GNorm = 1.1582, lr_0 = 5.3817e-04
Loss = 4.3047e-01, PNorm = 54.9152, GNorm = 3.2003, lr_0 = 5.3781e-04
Loss = 4.7638e-01, PNorm = 54.9319, GNorm = 1.2363, lr_0 = 5.3744e-04
Loss = 4.4502e-01, PNorm = 54.9462, GNorm = 1.4588, lr_0 = 5.3707e-04
Loss = 3.8724e-01, PNorm = 54.9569, GNorm = 1.4553, lr_0 = 5.3670e-04
Loss = 4.3649e-01, PNorm = 54.9671, GNorm = 1.3075, lr_0 = 5.3633e-04
Loss = 4.2648e-01, PNorm = 54.9789, GNorm = 1.4587, lr_0 = 5.3597e-04
Loss = 4.0803e-01, PNorm = 54.9877, GNorm = 1.8779, lr_0 = 5.3560e-04
Loss = 3.9495e-01, PNorm = 54.9947, GNorm = 1.4153, lr_0 = 5.3523e-04
Loss = 5.1397e-01, PNorm = 55.0045, GNorm = 1.4458, lr_0 = 5.3486e-04
Loss = 4.2947e-01, PNorm = 55.0203, GNorm = 1.4144, lr_0 = 5.3450e-04
Loss = 4.0273e-01, PNorm = 55.0293, GNorm = 1.2831, lr_0 = 5.3413e-04
Loss = 4.0879e-01, PNorm = 55.0396, GNorm = 1.3226, lr_0 = 5.3377e-04
Loss = 4.9567e-01, PNorm = 55.0500, GNorm = 1.4806, lr_0 = 5.3340e-04
Loss = 4.3829e-01, PNorm = 55.0630, GNorm = 1.2141, lr_0 = 5.3304e-04
Loss = 4.0226e-01, PNorm = 55.0654, GNorm = 1.1324, lr_0 = 5.3267e-04
Loss = 4.2162e-01, PNorm = 55.0771, GNorm = 1.8827, lr_0 = 5.3231e-04
Loss = 4.1756e-01, PNorm = 55.0881, GNorm = 1.2146, lr_0 = 5.3194e-04
Loss = 3.9647e-01, PNorm = 55.0982, GNorm = 1.0603, lr_0 = 5.3158e-04
Loss = 4.3897e-01, PNorm = 55.1121, GNorm = 1.4192, lr_0 = 5.3121e-04
Loss = 4.0189e-01, PNorm = 55.1215, GNorm = 1.1814, lr_0 = 5.3085e-04
Loss = 4.9108e-01, PNorm = 55.1309, GNorm = 1.4321, lr_0 = 5.3048e-04
Loss = 4.3462e-01, PNorm = 55.1318, GNorm = 1.7828, lr_0 = 5.3012e-04
Loss = 4.5079e-01, PNorm = 55.1404, GNorm = 1.7621, lr_0 = 5.2976e-04
Loss = 4.5483e-01, PNorm = 55.1486, GNorm = 1.9436, lr_0 = 5.2939e-04
Loss = 4.4349e-01, PNorm = 55.1633, GNorm = 1.3542, lr_0 = 5.2903e-04
Loss = 4.1339e-01, PNorm = 55.1772, GNorm = 1.2077, lr_0 = 5.2867e-04
Loss = 4.2321e-01, PNorm = 55.1937, GNorm = 1.4588, lr_0 = 5.2831e-04
Loss = 3.7389e-01, PNorm = 55.2045, GNorm = 2.1896, lr_0 = 5.2795e-04
Loss = 4.0057e-01, PNorm = 55.2106, GNorm = 2.0871, lr_0 = 5.2758e-04
Loss = 4.1645e-01, PNorm = 55.2130, GNorm = 1.4223, lr_0 = 5.2722e-04
Loss = 4.1627e-01, PNorm = 55.2216, GNorm = 1.3390, lr_0 = 5.2686e-04
Loss = 4.2110e-01, PNorm = 55.2314, GNorm = 1.2471, lr_0 = 5.2650e-04
Loss = 3.9753e-01, PNorm = 55.2380, GNorm = 1.4922, lr_0 = 5.2614e-04
Loss = 3.7054e-01, PNorm = 55.2429, GNorm = 1.5099, lr_0 = 5.2578e-04
Loss = 4.1685e-01, PNorm = 55.2489, GNorm = 2.0099, lr_0 = 5.2542e-04
Loss = 4.1549e-01, PNorm = 55.2647, GNorm = 1.5435, lr_0 = 5.2506e-04
Loss = 3.8893e-01, PNorm = 55.2727, GNorm = 1.8891, lr_0 = 5.2470e-04
Loss = 4.5176e-01, PNorm = 55.2832, GNorm = 1.6810, lr_0 = 5.2434e-04
Loss = 4.8535e-01, PNorm = 55.3003, GNorm = 1.7142, lr_0 = 5.2398e-04
Loss = 4.5306e-01, PNorm = 55.3116, GNorm = 0.8988, lr_0 = 5.2362e-04
Loss = 4.2530e-01, PNorm = 55.3187, GNorm = 1.2178, lr_0 = 5.2326e-04
Loss = 4.1368e-01, PNorm = 55.3280, GNorm = 1.2751, lr_0 = 5.2290e-04
Loss = 4.6095e-01, PNorm = 55.3348, GNorm = 1.8599, lr_0 = 5.2255e-04
Loss = 4.5657e-01, PNorm = 55.3416, GNorm = 1.5357, lr_0 = 5.2219e-04
Loss = 4.1510e-01, PNorm = 55.3500, GNorm = 0.9556, lr_0 = 5.2183e-04
Loss = 4.0785e-01, PNorm = 55.3637, GNorm = 1.1684, lr_0 = 5.2147e-04
Loss = 3.8405e-01, PNorm = 55.3748, GNorm = 1.9487, lr_0 = 5.2112e-04
Loss = 3.8019e-01, PNorm = 55.3830, GNorm = 1.0466, lr_0 = 5.2076e-04
Loss = 3.9874e-01, PNorm = 55.3941, GNorm = 1.2606, lr_0 = 5.2040e-04
Loss = 4.0027e-01, PNorm = 55.4057, GNorm = 1.3083, lr_0 = 5.2005e-04
Loss = 4.6137e-01, PNorm = 55.4107, GNorm = 2.2859, lr_0 = 5.1969e-04
Loss = 3.8428e-01, PNorm = 55.4175, GNorm = 1.1985, lr_0 = 5.1933e-04
Loss = 4.0227e-01, PNorm = 55.4275, GNorm = 0.8815, lr_0 = 5.1898e-04
Loss = 4.3222e-01, PNorm = 55.4382, GNorm = 3.1970, lr_0 = 5.1862e-04
Loss = 4.2125e-01, PNorm = 55.4463, GNorm = 1.1897, lr_0 = 5.1827e-04
Loss = 4.8117e-01, PNorm = 55.4492, GNorm = 1.7409, lr_0 = 5.1791e-04
Validation mae = 0.115413
Epoch 10
Loss = 4.4685e-01, PNorm = 55.4594, GNorm = 1.0746, lr_0 = 5.1756e-04
Loss = 3.8548e-01, PNorm = 55.4697, GNorm = 0.9174, lr_0 = 5.1720e-04
Loss = 3.9474e-01, PNorm = 55.4772, GNorm = 1.4149, lr_0 = 5.1685e-04
Loss = 4.0765e-01, PNorm = 55.4891, GNorm = 0.8850, lr_0 = 5.1649e-04
Loss = 3.7511e-01, PNorm = 55.4976, GNorm = 1.4374, lr_0 = 5.1614e-04
Loss = 3.9833e-01, PNorm = 55.5151, GNorm = 0.9964, lr_0 = 5.1579e-04
Loss = 4.6044e-01, PNorm = 55.5254, GNorm = 2.4790, lr_0 = 5.1543e-04
Loss = 4.5348e-01, PNorm = 55.5367, GNorm = 2.4712, lr_0 = 5.1508e-04
Loss = 4.3621e-01, PNorm = 55.5433, GNorm = 1.1288, lr_0 = 5.1473e-04
Loss = 4.3966e-01, PNorm = 55.5591, GNorm = 1.3751, lr_0 = 5.1437e-04
Loss = 4.0940e-01, PNorm = 55.5688, GNorm = 1.0847, lr_0 = 5.1402e-04
Loss = 3.5885e-01, PNorm = 55.5749, GNorm = 1.0688, lr_0 = 5.1367e-04
Loss = 3.8306e-01, PNorm = 55.5824, GNorm = 0.9368, lr_0 = 5.1332e-04
Loss = 4.0339e-01, PNorm = 55.5845, GNorm = 1.1044, lr_0 = 5.1297e-04
Loss = 4.0730e-01, PNorm = 55.5893, GNorm = 1.2342, lr_0 = 5.1262e-04
Loss = 3.9498e-01, PNorm = 55.5917, GNorm = 0.9412, lr_0 = 5.1226e-04
Loss = 4.4601e-01, PNorm = 55.5984, GNorm = 1.5686, lr_0 = 5.1191e-04
Loss = 3.7466e-01, PNorm = 55.6070, GNorm = 1.2207, lr_0 = 5.1156e-04
Loss = 4.2427e-01, PNorm = 55.6154, GNorm = 1.7864, lr_0 = 5.1121e-04
Loss = 4.0622e-01, PNorm = 55.6278, GNorm = 1.2326, lr_0 = 5.1086e-04
Loss = 4.3355e-01, PNorm = 55.6415, GNorm = 1.2248, lr_0 = 5.1051e-04
Loss = 3.5943e-01, PNorm = 55.6554, GNorm = 1.7400, lr_0 = 5.1016e-04
Loss = 4.8355e-01, PNorm = 55.6609, GNorm = 1.4003, lr_0 = 5.0981e-04
Loss = 3.9572e-01, PNorm = 55.6727, GNorm = 1.8016, lr_0 = 5.0946e-04
Loss = 4.6698e-01, PNorm = 55.6849, GNorm = 2.3650, lr_0 = 5.0911e-04
Loss = 4.4210e-01, PNorm = 55.6895, GNorm = 0.9929, lr_0 = 5.0877e-04
Loss = 4.3225e-01, PNorm = 55.6937, GNorm = 1.3955, lr_0 = 5.0842e-04
Loss = 4.2065e-01, PNorm = 55.7048, GNorm = 0.9917, lr_0 = 5.0807e-04
Loss = 4.3589e-01, PNorm = 55.7150, GNorm = 1.6117, lr_0 = 5.0772e-04
Loss = 4.4328e-01, PNorm = 55.7277, GNorm = 1.5555, lr_0 = 5.0737e-04
Loss = 3.6714e-01, PNorm = 55.7419, GNorm = 1.0666, lr_0 = 5.0703e-04
Loss = 4.9254e-01, PNorm = 55.7406, GNorm = 1.3959, lr_0 = 5.0668e-04
Loss = 4.1763e-01, PNorm = 55.7465, GNorm = 1.5959, lr_0 = 5.0633e-04
Loss = 4.2384e-01, PNorm = 55.7557, GNorm = 0.9697, lr_0 = 5.0598e-04
Loss = 4.0497e-01, PNorm = 55.7711, GNorm = 2.2757, lr_0 = 5.0564e-04
Loss = 4.0605e-01, PNorm = 55.7858, GNorm = 2.4601, lr_0 = 5.0529e-04
Loss = 4.0824e-01, PNorm = 55.7961, GNorm = 1.6005, lr_0 = 5.0494e-04
Loss = 4.1297e-01, PNorm = 55.8056, GNorm = 1.0782, lr_0 = 5.0460e-04
Loss = 4.3594e-01, PNorm = 55.8164, GNorm = 1.9491, lr_0 = 5.0425e-04
Loss = 4.2194e-01, PNorm = 55.8244, GNorm = 1.5813, lr_0 = 5.0391e-04
Loss = 4.1533e-01, PNorm = 55.8322, GNorm = 0.9236, lr_0 = 5.0356e-04
Loss = 3.8207e-01, PNorm = 55.8423, GNorm = 1.2565, lr_0 = 5.0322e-04
Loss = 4.4099e-01, PNorm = 55.8523, GNorm = 1.2236, lr_0 = 5.0287e-04
Loss = 4.0888e-01, PNorm = 55.8606, GNorm = 1.2102, lr_0 = 5.0253e-04
Loss = 4.2292e-01, PNorm = 55.8699, GNorm = 1.7306, lr_0 = 5.0218e-04
Loss = 4.4443e-01, PNorm = 55.8760, GNorm = 1.5960, lr_0 = 5.0184e-04
Loss = 4.3293e-01, PNorm = 55.8860, GNorm = 1.7349, lr_0 = 5.0150e-04
Loss = 3.7211e-01, PNorm = 55.9022, GNorm = 1.7925, lr_0 = 5.0115e-04
Loss = 4.1194e-01, PNorm = 55.9114, GNorm = 1.5156, lr_0 = 5.0081e-04
Loss = 3.9912e-01, PNorm = 55.9208, GNorm = 1.3937, lr_0 = 5.0047e-04
Loss = 3.9691e-01, PNorm = 55.9296, GNorm = 1.5256, lr_0 = 5.0012e-04
Loss = 4.5127e-01, PNorm = 55.9385, GNorm = 2.3565, lr_0 = 4.9978e-04
Loss = 4.0790e-01, PNorm = 55.9444, GNorm = 1.7355, lr_0 = 4.9944e-04
Loss = 3.9801e-01, PNorm = 55.9474, GNorm = 0.9483, lr_0 = 4.9910e-04
Loss = 4.3492e-01, PNorm = 55.9554, GNorm = 1.4817, lr_0 = 4.9875e-04
Loss = 3.8214e-01, PNorm = 55.9612, GNorm = 1.1663, lr_0 = 4.9841e-04
Loss = 3.8539e-01, PNorm = 55.9720, GNorm = 1.4354, lr_0 = 4.9807e-04
Loss = 3.6257e-01, PNorm = 55.9773, GNorm = 2.1616, lr_0 = 4.9773e-04
Loss = 3.7240e-01, PNorm = 55.9891, GNorm = 1.1509, lr_0 = 4.9739e-04
Loss = 3.6866e-01, PNorm = 55.9983, GNorm = 0.9661, lr_0 = 4.9705e-04
Loss = 4.2516e-01, PNorm = 56.0089, GNorm = 1.2807, lr_0 = 4.9671e-04
Loss = 4.1410e-01, PNorm = 56.0177, GNorm = 0.9975, lr_0 = 4.9637e-04
Loss = 4.4585e-01, PNorm = 56.0246, GNorm = 1.7058, lr_0 = 4.9603e-04
Loss = 4.2649e-01, PNorm = 56.0351, GNorm = 1.9429, lr_0 = 4.9569e-04
Loss = 4.2950e-01, PNorm = 56.0488, GNorm = 1.0050, lr_0 = 4.9535e-04
Loss = 4.2360e-01, PNorm = 56.0583, GNorm = 2.2009, lr_0 = 4.9501e-04
Loss = 4.5583e-01, PNorm = 56.0739, GNorm = 1.4481, lr_0 = 4.9467e-04
Loss = 3.9689e-01, PNorm = 56.0857, GNorm = 1.5578, lr_0 = 4.9433e-04
Loss = 3.9358e-01, PNorm = 56.0941, GNorm = 1.5331, lr_0 = 4.9399e-04
Loss = 4.0728e-01, PNorm = 56.0959, GNorm = 1.4864, lr_0 = 4.9365e-04
Loss = 3.7520e-01, PNorm = 56.1115, GNorm = 1.0918, lr_0 = 4.9332e-04
Loss = 4.6203e-01, PNorm = 56.1169, GNorm = 1.5921, lr_0 = 4.9298e-04
Loss = 4.0458e-01, PNorm = 56.1287, GNorm = 1.3956, lr_0 = 4.9264e-04
Loss = 3.7848e-01, PNorm = 56.1342, GNorm = 1.5970, lr_0 = 4.9230e-04
Loss = 3.9231e-01, PNorm = 56.1483, GNorm = 1.5179, lr_0 = 4.9197e-04
Loss = 4.7317e-01, PNorm = 56.1602, GNorm = 2.0835, lr_0 = 4.9163e-04
Loss = 3.5024e-01, PNorm = 56.1660, GNorm = 0.8999, lr_0 = 4.9129e-04
Loss = 4.7870e-01, PNorm = 56.1712, GNorm = 1.2056, lr_0 = 4.9095e-04
Loss = 4.1207e-01, PNorm = 56.1813, GNorm = 1.7188, lr_0 = 4.9062e-04
Loss = 4.0366e-01, PNorm = 56.1873, GNorm = 1.2928, lr_0 = 4.9028e-04
Loss = 3.8798e-01, PNorm = 56.1903, GNorm = 1.5974, lr_0 = 4.8995e-04
Loss = 4.2075e-01, PNorm = 56.1981, GNorm = 1.5727, lr_0 = 4.8961e-04
Loss = 4.4203e-01, PNorm = 56.2110, GNorm = 1.2528, lr_0 = 4.8928e-04
Loss = 4.7588e-01, PNorm = 56.2192, GNorm = 1.6981, lr_0 = 4.8894e-04
Loss = 4.3442e-01, PNorm = 56.2353, GNorm = 2.0132, lr_0 = 4.8861e-04
Loss = 4.3175e-01, PNorm = 56.2491, GNorm = 1.2355, lr_0 = 4.8827e-04
Loss = 4.3935e-01, PNorm = 56.2548, GNorm = 1.4000, lr_0 = 4.8794e-04
Loss = 3.9618e-01, PNorm = 56.2612, GNorm = 1.1027, lr_0 = 4.8760e-04
Loss = 4.0969e-01, PNorm = 56.2696, GNorm = 1.4901, lr_0 = 4.8727e-04
Loss = 4.2035e-01, PNorm = 56.2745, GNorm = 1.5707, lr_0 = 4.8693e-04
Loss = 3.8613e-01, PNorm = 56.2800, GNorm = 1.3070, lr_0 = 4.8660e-04
Loss = 4.1912e-01, PNorm = 56.2859, GNorm = 1.1972, lr_0 = 4.8627e-04
Loss = 4.4148e-01, PNorm = 56.2898, GNorm = 1.6193, lr_0 = 4.8593e-04
Loss = 4.2897e-01, PNorm = 56.2986, GNorm = 1.1502, lr_0 = 4.8560e-04
Loss = 4.1630e-01, PNorm = 56.3041, GNorm = 1.3887, lr_0 = 4.8527e-04
Loss = 3.7698e-01, PNorm = 56.3130, GNorm = 1.0823, lr_0 = 4.8494e-04
Loss = 5.7794e-01, PNorm = 56.3186, GNorm = 1.4323, lr_0 = 4.8460e-04
Loss = 4.0196e-01, PNorm = 56.3270, GNorm = 1.0366, lr_0 = 4.8427e-04
Loss = 3.9328e-01, PNorm = 56.3299, GNorm = 1.1826, lr_0 = 4.8394e-04
Loss = 3.9177e-01, PNorm = 56.3412, GNorm = 1.2654, lr_0 = 4.8361e-04
Loss = 4.0897e-01, PNorm = 56.3467, GNorm = 1.3508, lr_0 = 4.8328e-04
Loss = 4.3704e-01, PNorm = 56.3585, GNorm = 1.7220, lr_0 = 4.8295e-04
Loss = 4.7971e-01, PNorm = 56.3639, GNorm = 1.2944, lr_0 = 4.8262e-04
Loss = 3.8535e-01, PNorm = 56.3741, GNorm = 1.2271, lr_0 = 4.8228e-04
Loss = 4.3227e-01, PNorm = 56.3809, GNorm = 1.6566, lr_0 = 4.8195e-04
Loss = 4.3222e-01, PNorm = 56.3919, GNorm = 1.2562, lr_0 = 4.8162e-04
Loss = 4.3375e-01, PNorm = 56.3961, GNorm = 1.8092, lr_0 = 4.8129e-04
Loss = 3.7524e-01, PNorm = 56.4019, GNorm = 1.4392, lr_0 = 4.8096e-04
Loss = 4.3778e-01, PNorm = 56.4043, GNorm = 1.8072, lr_0 = 4.8064e-04
Loss = 3.8481e-01, PNorm = 56.4135, GNorm = 1.0062, lr_0 = 4.8031e-04
Loss = 3.8478e-01, PNorm = 56.4224, GNorm = 1.3400, lr_0 = 4.7998e-04
Loss = 4.0946e-01, PNorm = 56.4334, GNorm = 1.0167, lr_0 = 4.7965e-04
Loss = 4.5168e-01, PNorm = 56.4447, GNorm = 1.2817, lr_0 = 4.7932e-04
Loss = 4.5936e-01, PNorm = 56.4540, GNorm = 1.9080, lr_0 = 4.7899e-04
Loss = 4.4330e-01, PNorm = 56.4674, GNorm = 1.6004, lr_0 = 4.7866e-04
Loss = 4.0448e-01, PNorm = 56.4743, GNorm = 1.8004, lr_0 = 4.7833e-04
Loss = 4.3506e-01, PNorm = 56.4737, GNorm = 2.0008, lr_0 = 4.7801e-04
Loss = 4.2214e-01, PNorm = 56.4717, GNorm = 1.4437, lr_0 = 4.7768e-04
Loss = 4.3291e-01, PNorm = 56.4820, GNorm = 1.3265, lr_0 = 4.7735e-04
Loss = 4.2591e-01, PNorm = 56.4901, GNorm = 1.7363, lr_0 = 4.7703e-04
Validation mae = 0.115314
Epoch 11
Loss = 4.0296e-01, PNorm = 56.5029, GNorm = 1.3134, lr_0 = 4.7670e-04
Loss = 4.4757e-01, PNorm = 56.5117, GNorm = 1.6315, lr_0 = 4.7637e-04
Loss = 4.1011e-01, PNorm = 56.5233, GNorm = 1.1044, lr_0 = 4.7605e-04
Loss = 4.2446e-01, PNorm = 56.5315, GNorm = 1.2219, lr_0 = 4.7572e-04
Loss = 4.6523e-01, PNorm = 56.5384, GNorm = 1.3712, lr_0 = 4.7539e-04
Loss = 3.9072e-01, PNorm = 56.5477, GNorm = 1.2493, lr_0 = 4.7507e-04
Loss = 3.9259e-01, PNorm = 56.5581, GNorm = 1.2825, lr_0 = 4.7474e-04
Loss = 4.0938e-01, PNorm = 56.5662, GNorm = 1.5102, lr_0 = 4.7442e-04
Loss = 3.8969e-01, PNorm = 56.5751, GNorm = 1.3384, lr_0 = 4.7409e-04
Loss = 4.6211e-01, PNorm = 56.5776, GNorm = 2.5807, lr_0 = 4.7377e-04
Loss = 3.6047e-01, PNorm = 56.5884, GNorm = 1.3030, lr_0 = 4.7344e-04
Loss = 3.8080e-01, PNorm = 56.6046, GNorm = 1.2919, lr_0 = 4.7312e-04
Loss = 4.0157e-01, PNorm = 56.6135, GNorm = 1.0146, lr_0 = 4.7279e-04
Loss = 3.7918e-01, PNorm = 56.6166, GNorm = 1.5868, lr_0 = 4.7247e-04
Loss = 3.8619e-01, PNorm = 56.6238, GNorm = 1.3988, lr_0 = 4.7215e-04
Loss = 4.0345e-01, PNorm = 56.6291, GNorm = 1.7759, lr_0 = 4.7182e-04
Loss = 4.4904e-01, PNorm = 56.6367, GNorm = 1.5605, lr_0 = 4.7150e-04
Loss = 4.4364e-01, PNorm = 56.6460, GNorm = 1.0843, lr_0 = 4.7118e-04
Loss = 4.2788e-01, PNorm = 56.6535, GNorm = 1.1924, lr_0 = 4.7085e-04
Loss = 3.9380e-01, PNorm = 56.6590, GNorm = 1.5636, lr_0 = 4.7053e-04
Loss = 3.7843e-01, PNorm = 56.6639, GNorm = 1.3395, lr_0 = 4.7021e-04
Loss = 4.1358e-01, PNorm = 56.6645, GNorm = 1.5671, lr_0 = 4.6989e-04
Loss = 3.9570e-01, PNorm = 56.6748, GNorm = 2.2706, lr_0 = 4.6957e-04
Loss = 4.5163e-01, PNorm = 56.6840, GNorm = 2.0797, lr_0 = 4.6924e-04
Loss = 4.1510e-01, PNorm = 56.6893, GNorm = 2.0165, lr_0 = 4.6892e-04
Loss = 4.0442e-01, PNorm = 56.6967, GNorm = 1.1162, lr_0 = 4.6860e-04
Loss = 4.0298e-01, PNorm = 56.7070, GNorm = 1.1282, lr_0 = 4.6828e-04
Loss = 3.9726e-01, PNorm = 56.7123, GNorm = 1.9063, lr_0 = 4.6796e-04
Loss = 4.3745e-01, PNorm = 56.7199, GNorm = 1.0464, lr_0 = 4.6764e-04
Loss = 4.0000e-01, PNorm = 56.7235, GNorm = 1.2732, lr_0 = 4.6732e-04
Loss = 4.5558e-01, PNorm = 56.7284, GNorm = 1.4393, lr_0 = 4.6700e-04
Loss = 4.0166e-01, PNorm = 56.7381, GNorm = 1.5579, lr_0 = 4.6668e-04
Loss = 3.4881e-01, PNorm = 56.7440, GNorm = 1.0771, lr_0 = 4.6636e-04
Loss = 4.4232e-01, PNorm = 56.7471, GNorm = 1.6730, lr_0 = 4.6604e-04
Loss = 4.0378e-01, PNorm = 56.7565, GNorm = 1.5769, lr_0 = 4.6572e-04
Loss = 4.0151e-01, PNorm = 56.7577, GNorm = 1.1361, lr_0 = 4.6540e-04
Loss = 4.0813e-01, PNorm = 56.7683, GNorm = 2.1431, lr_0 = 4.6508e-04
Loss = 3.9898e-01, PNorm = 56.7730, GNorm = 1.2143, lr_0 = 4.6476e-04
Loss = 4.1352e-01, PNorm = 56.7812, GNorm = 1.2705, lr_0 = 4.6445e-04
Loss = 4.2527e-01, PNorm = 56.7851, GNorm = 1.8937, lr_0 = 4.6413e-04
Loss = 4.1290e-01, PNorm = 56.7953, GNorm = 1.6112, lr_0 = 4.6381e-04
Loss = 4.7808e-01, PNorm = 56.8059, GNorm = 1.1927, lr_0 = 4.6349e-04
Loss = 3.8329e-01, PNorm = 56.8160, GNorm = 1.2006, lr_0 = 4.6317e-04
Loss = 3.6375e-01, PNorm = 56.8211, GNorm = 1.1452, lr_0 = 4.6286e-04
Loss = 3.8866e-01, PNorm = 56.8278, GNorm = 1.0472, lr_0 = 4.6254e-04
Loss = 4.3182e-01, PNorm = 56.8326, GNorm = 1.2405, lr_0 = 4.6222e-04
Loss = 3.6427e-01, PNorm = 56.8418, GNorm = 1.4238, lr_0 = 4.6191e-04
Loss = 4.3985e-01, PNorm = 56.8459, GNorm = 2.0103, lr_0 = 4.6159e-04
Loss = 3.4790e-01, PNorm = 56.8552, GNorm = 1.9555, lr_0 = 4.6127e-04
Loss = 3.7908e-01, PNorm = 56.8640, GNorm = 0.9099, lr_0 = 4.6096e-04
Loss = 3.7203e-01, PNorm = 56.8771, GNorm = 1.4236, lr_0 = 4.6064e-04
Loss = 4.4824e-01, PNorm = 56.8790, GNorm = 1.2849, lr_0 = 4.6033e-04
Loss = 3.9843e-01, PNorm = 56.8871, GNorm = 1.1667, lr_0 = 4.6001e-04
Loss = 4.2719e-01, PNorm = 56.8944, GNorm = 1.2756, lr_0 = 4.5970e-04
Loss = 4.0431e-01, PNorm = 56.9084, GNorm = 1.5126, lr_0 = 4.5938e-04
Loss = 3.9005e-01, PNorm = 56.9194, GNorm = 1.0165, lr_0 = 4.5907e-04
Loss = 3.3485e-01, PNorm = 56.9302, GNorm = 1.0393, lr_0 = 4.5875e-04
Loss = 3.5909e-01, PNorm = 56.9379, GNorm = 1.2353, lr_0 = 4.5844e-04
Loss = 4.1165e-01, PNorm = 56.9431, GNorm = 1.6961, lr_0 = 4.5812e-04
Loss = 4.1225e-01, PNorm = 56.9495, GNorm = 1.5519, lr_0 = 4.5781e-04
Loss = 3.9532e-01, PNorm = 56.9547, GNorm = 1.1762, lr_0 = 4.5750e-04
Loss = 5.1444e-01, PNorm = 56.9603, GNorm = 1.1301, lr_0 = 4.5718e-04
Loss = 4.2314e-01, PNorm = 56.9713, GNorm = 1.1746, lr_0 = 4.5687e-04
Loss = 4.2414e-01, PNorm = 56.9758, GNorm = 1.7952, lr_0 = 4.5656e-04
Loss = 4.1600e-01, PNorm = 56.9875, GNorm = 1.1672, lr_0 = 4.5624e-04
Loss = 3.7623e-01, PNorm = 56.9941, GNorm = 1.3803, lr_0 = 4.5593e-04
Loss = 4.6598e-01, PNorm = 57.0092, GNorm = 1.0708, lr_0 = 4.5562e-04
Loss = 4.0943e-01, PNorm = 57.0229, GNorm = 1.4916, lr_0 = 4.5531e-04
Loss = 4.2873e-01, PNorm = 57.0289, GNorm = 2.0401, lr_0 = 4.5499e-04
Loss = 4.3317e-01, PNorm = 57.0408, GNorm = 0.9106, lr_0 = 4.5468e-04
Loss = 3.6395e-01, PNorm = 57.0482, GNorm = 1.2729, lr_0 = 4.5437e-04
Loss = 4.4671e-01, PNorm = 57.0554, GNorm = 1.3091, lr_0 = 4.5406e-04
Loss = 4.7113e-01, PNorm = 57.0635, GNorm = 1.5597, lr_0 = 4.5375e-04
Loss = 3.8002e-01, PNorm = 57.0685, GNorm = 2.3083, lr_0 = 4.5344e-04
Loss = 4.0925e-01, PNorm = 57.0786, GNorm = 1.4618, lr_0 = 4.5313e-04
Loss = 4.2744e-01, PNorm = 57.0839, GNorm = 1.0283, lr_0 = 4.5282e-04
Loss = 4.3170e-01, PNorm = 57.0892, GNorm = 1.1615, lr_0 = 4.5251e-04
Loss = 4.0070e-01, PNorm = 57.0929, GNorm = 1.8909, lr_0 = 4.5220e-04
Loss = 4.2856e-01, PNorm = 57.0977, GNorm = 1.4622, lr_0 = 4.5189e-04
Loss = 3.8969e-01, PNorm = 57.1085, GNorm = 1.2965, lr_0 = 4.5158e-04
Loss = 3.6824e-01, PNorm = 57.1198, GNorm = 1.3960, lr_0 = 4.5127e-04
Loss = 4.0288e-01, PNorm = 57.1309, GNorm = 1.0588, lr_0 = 4.5096e-04
Loss = 3.8243e-01, PNorm = 57.1380, GNorm = 1.2108, lr_0 = 4.5065e-04
Loss = 3.7340e-01, PNorm = 57.1439, GNorm = 1.8690, lr_0 = 4.5034e-04
Loss = 4.3912e-01, PNorm = 57.1517, GNorm = 1.1448, lr_0 = 4.5003e-04
Loss = 3.9421e-01, PNorm = 57.1584, GNorm = 2.6869, lr_0 = 4.4972e-04
Loss = 4.1742e-01, PNorm = 57.1668, GNorm = 1.1113, lr_0 = 4.4942e-04
Loss = 4.1218e-01, PNorm = 57.1712, GNorm = 0.9843, lr_0 = 4.4911e-04
Loss = 4.3386e-01, PNorm = 57.1777, GNorm = 1.7868, lr_0 = 4.4880e-04
Loss = 4.3544e-01, PNorm = 57.1811, GNorm = 1.7466, lr_0 = 4.4849e-04
Loss = 4.2542e-01, PNorm = 57.1919, GNorm = 1.3680, lr_0 = 4.4819e-04
Loss = 3.9282e-01, PNorm = 57.1995, GNorm = 1.2173, lr_0 = 4.4788e-04
Loss = 3.8905e-01, PNorm = 57.2054, GNorm = 1.6213, lr_0 = 4.4757e-04
Loss = 3.5058e-01, PNorm = 57.2128, GNorm = 1.6674, lr_0 = 4.4727e-04
Loss = 4.0516e-01, PNorm = 57.2212, GNorm = 1.7834, lr_0 = 4.4696e-04
Loss = 4.2346e-01, PNorm = 57.2286, GNorm = 1.2620, lr_0 = 4.4665e-04
Loss = 4.1002e-01, PNorm = 57.2369, GNorm = 1.8682, lr_0 = 4.4635e-04
Loss = 3.6436e-01, PNorm = 57.2407, GNorm = 1.3010, lr_0 = 4.4604e-04
Loss = 4.1551e-01, PNorm = 57.2464, GNorm = 1.4746, lr_0 = 4.4574e-04
Loss = 3.3871e-01, PNorm = 57.2522, GNorm = 1.6600, lr_0 = 4.4543e-04
Loss = 4.1033e-01, PNorm = 57.2595, GNorm = 1.1857, lr_0 = 4.4513e-04
Loss = 3.9641e-01, PNorm = 57.2677, GNorm = 0.9905, lr_0 = 4.4482e-04
Loss = 3.6380e-01, PNorm = 57.2764, GNorm = 1.4635, lr_0 = 4.4452e-04
Loss = 3.8486e-01, PNorm = 57.2818, GNorm = 1.2545, lr_0 = 4.4421e-04
Loss = 3.6314e-01, PNorm = 57.2897, GNorm = 1.6039, lr_0 = 4.4391e-04
Loss = 4.1426e-01, PNorm = 57.3019, GNorm = 1.7043, lr_0 = 4.4360e-04
Loss = 4.0722e-01, PNorm = 57.3147, GNorm = 1.4205, lr_0 = 4.4330e-04
Loss = 4.2916e-01, PNorm = 57.3249, GNorm = 1.1717, lr_0 = 4.4299e-04
Loss = 3.9105e-01, PNorm = 57.3295, GNorm = 1.2552, lr_0 = 4.4269e-04
Loss = 4.2111e-01, PNorm = 57.3356, GNorm = 2.3832, lr_0 = 4.4239e-04
Loss = 4.1008e-01, PNorm = 57.3431, GNorm = 1.3132, lr_0 = 4.4209e-04
Loss = 4.1399e-01, PNorm = 57.3471, GNorm = 1.0188, lr_0 = 4.4178e-04
Loss = 3.8902e-01, PNorm = 57.3510, GNorm = 1.3489, lr_0 = 4.4148e-04
Loss = 4.0793e-01, PNorm = 57.3549, GNorm = 1.5777, lr_0 = 4.4118e-04
Loss = 4.1185e-01, PNorm = 57.3646, GNorm = 1.4108, lr_0 = 4.4088e-04
Loss = 3.9186e-01, PNorm = 57.3729, GNorm = 1.8429, lr_0 = 4.4057e-04
Loss = 3.8145e-01, PNorm = 57.3872, GNorm = 1.8015, lr_0 = 4.4027e-04
Loss = 4.6432e-01, PNorm = 57.3980, GNorm = 1.4502, lr_0 = 4.3997e-04
Loss = 3.8041e-01, PNorm = 57.4134, GNorm = 1.3571, lr_0 = 4.3967e-04
Loss = 4.0366e-01, PNorm = 57.4185, GNorm = 1.3298, lr_0 = 4.3937e-04
Validation mae = 0.113803
Epoch 12
Loss = 3.8619e-01, PNorm = 57.4241, GNorm = 1.4296, lr_0 = 4.3907e-04
Loss = 4.3634e-01, PNorm = 57.4356, GNorm = 2.5047, lr_0 = 4.3877e-04
Loss = 4.5594e-01, PNorm = 57.4411, GNorm = 0.9761, lr_0 = 4.3846e-04
Loss = 3.7715e-01, PNorm = 57.4498, GNorm = 1.4520, lr_0 = 4.3816e-04
Loss = 3.9865e-01, PNorm = 57.4563, GNorm = 1.0399, lr_0 = 4.3786e-04
Loss = 4.3005e-01, PNorm = 57.4596, GNorm = 1.6243, lr_0 = 4.3756e-04
Loss = 4.1772e-01, PNorm = 57.4638, GNorm = 1.0940, lr_0 = 4.3726e-04
Loss = 4.0662e-01, PNorm = 57.4694, GNorm = 1.1413, lr_0 = 4.3696e-04
Loss = 4.6063e-01, PNorm = 57.4765, GNorm = 1.2745, lr_0 = 4.3667e-04
Loss = 3.9356e-01, PNorm = 57.4835, GNorm = 1.7179, lr_0 = 4.3637e-04
Loss = 3.7880e-01, PNorm = 57.4907, GNorm = 2.2722, lr_0 = 4.3607e-04
Loss = 3.6847e-01, PNorm = 57.4954, GNorm = 1.2329, lr_0 = 4.3577e-04
Loss = 3.5964e-01, PNorm = 57.5080, GNorm = 1.0894, lr_0 = 4.3547e-04
Loss = 3.9338e-01, PNorm = 57.5151, GNorm = 1.5123, lr_0 = 4.3517e-04
Loss = 3.7319e-01, PNorm = 57.5259, GNorm = 1.0514, lr_0 = 4.3487e-04
Loss = 4.0472e-01, PNorm = 57.5329, GNorm = 1.1048, lr_0 = 4.3458e-04
Loss = 3.9571e-01, PNorm = 57.5446, GNorm = 1.3192, lr_0 = 4.3428e-04
Loss = 4.2483e-01, PNorm = 57.5576, GNorm = 1.4603, lr_0 = 4.3398e-04
Loss = 4.1111e-01, PNorm = 57.5674, GNorm = 1.3145, lr_0 = 4.3368e-04
Loss = 4.0975e-01, PNorm = 57.5782, GNorm = 1.8355, lr_0 = 4.3339e-04
Loss = 3.6217e-01, PNorm = 57.5861, GNorm = 1.0901, lr_0 = 4.3309e-04
Loss = 4.0071e-01, PNorm = 57.5899, GNorm = 1.3808, lr_0 = 4.3279e-04
Loss = 4.1898e-01, PNorm = 57.5948, GNorm = 1.0987, lr_0 = 4.3250e-04
Loss = 4.1870e-01, PNorm = 57.6006, GNorm = 2.2391, lr_0 = 4.3220e-04
Loss = 3.3216e-01, PNorm = 57.6045, GNorm = 1.6456, lr_0 = 4.3190e-04
Loss = 4.0059e-01, PNorm = 57.6099, GNorm = 1.7580, lr_0 = 4.3161e-04
Loss = 3.5478e-01, PNorm = 57.6223, GNorm = 1.5675, lr_0 = 4.3131e-04
Loss = 3.7303e-01, PNorm = 57.6289, GNorm = 1.5521, lr_0 = 4.3102e-04
Loss = 4.0148e-01, PNorm = 57.6350, GNorm = 1.2987, lr_0 = 4.3072e-04
Loss = 3.3158e-01, PNorm = 57.6408, GNorm = 1.1073, lr_0 = 4.3043e-04
Loss = 4.2406e-01, PNorm = 57.6495, GNorm = 1.2334, lr_0 = 4.3013e-04
Loss = 4.2826e-01, PNorm = 57.6589, GNorm = 2.4363, lr_0 = 4.2984e-04
Loss = 4.0418e-01, PNorm = 57.6693, GNorm = 0.9823, lr_0 = 4.2954e-04
Loss = 4.3302e-01, PNorm = 57.6800, GNorm = 1.1185, lr_0 = 4.2925e-04
Loss = 3.9738e-01, PNorm = 57.6902, GNorm = 1.1209, lr_0 = 4.2895e-04
Loss = 4.0276e-01, PNorm = 57.6963, GNorm = 1.2288, lr_0 = 4.2866e-04
Loss = 3.9336e-01, PNorm = 57.7038, GNorm = 1.0445, lr_0 = 4.2837e-04
Loss = 3.7202e-01, PNorm = 57.7102, GNorm = 0.9238, lr_0 = 4.2807e-04
Loss = 4.5384e-01, PNorm = 57.7170, GNorm = 1.2594, lr_0 = 4.2778e-04
Loss = 3.9145e-01, PNorm = 57.7233, GNorm = 1.2800, lr_0 = 4.2749e-04
Loss = 3.8710e-01, PNorm = 57.7325, GNorm = 1.5393, lr_0 = 4.2719e-04
Loss = 3.7368e-01, PNorm = 57.7419, GNorm = 1.1333, lr_0 = 4.2690e-04
Loss = 3.8334e-01, PNorm = 57.7443, GNorm = 1.1507, lr_0 = 4.2661e-04
Loss = 4.0322e-01, PNorm = 57.7469, GNorm = 1.2740, lr_0 = 4.2632e-04
Loss = 3.9582e-01, PNorm = 57.7555, GNorm = 1.1740, lr_0 = 4.2602e-04
Loss = 4.6087e-01, PNorm = 57.7614, GNorm = 1.4123, lr_0 = 4.2573e-04
Loss = 4.3355e-01, PNorm = 57.7726, GNorm = 1.3184, lr_0 = 4.2544e-04
Loss = 3.6982e-01, PNorm = 57.7769, GNorm = 1.3238, lr_0 = 4.2515e-04
Loss = 4.7082e-01, PNorm = 57.7882, GNorm = 1.3683, lr_0 = 4.2486e-04
Loss = 3.8239e-01, PNorm = 57.7950, GNorm = 1.2537, lr_0 = 4.2457e-04
Loss = 3.8197e-01, PNorm = 57.8029, GNorm = 1.6570, lr_0 = 4.2428e-04
Loss = 4.0407e-01, PNorm = 57.8117, GNorm = 1.1879, lr_0 = 4.2399e-04
Loss = 4.1920e-01, PNorm = 57.8257, GNorm = 1.6487, lr_0 = 4.2370e-04
Loss = 3.6085e-01, PNorm = 57.8321, GNorm = 1.8058, lr_0 = 4.2340e-04
Loss = 3.8143e-01, PNorm = 57.8420, GNorm = 1.5072, lr_0 = 4.2311e-04
Loss = 3.8598e-01, PNorm = 57.8482, GNorm = 1.5862, lr_0 = 4.2283e-04
Loss = 4.5827e-01, PNorm = 57.8535, GNorm = 1.8481, lr_0 = 4.2254e-04
Loss = 4.6518e-01, PNorm = 57.8609, GNorm = 1.2089, lr_0 = 4.2225e-04
Loss = 3.8047e-01, PNorm = 57.8717, GNorm = 1.2125, lr_0 = 4.2196e-04
Loss = 4.0946e-01, PNorm = 57.8782, GNorm = 1.2300, lr_0 = 4.2167e-04
Loss = 4.5320e-01, PNorm = 57.8859, GNorm = 1.1502, lr_0 = 4.2138e-04
Loss = 4.6128e-01, PNorm = 57.8823, GNorm = 1.3833, lr_0 = 4.2109e-04
Loss = 4.2540e-01, PNorm = 57.8942, GNorm = 1.5770, lr_0 = 4.2080e-04
Loss = 3.7283e-01, PNorm = 57.9008, GNorm = 1.0459, lr_0 = 4.2051e-04
Loss = 3.6308e-01, PNorm = 57.9070, GNorm = 0.8428, lr_0 = 4.2023e-04
Loss = 3.9876e-01, PNorm = 57.9121, GNorm = 1.4732, lr_0 = 4.1994e-04
Loss = 3.9811e-01, PNorm = 57.9213, GNorm = 2.1568, lr_0 = 4.1965e-04
Loss = 4.0799e-01, PNorm = 57.9283, GNorm = 1.9252, lr_0 = 4.1936e-04
Loss = 3.9532e-01, PNorm = 57.9347, GNorm = 1.6795, lr_0 = 4.1907e-04
Loss = 3.4632e-01, PNorm = 57.9398, GNorm = 1.2053, lr_0 = 4.1879e-04
Loss = 4.2604e-01, PNorm = 57.9460, GNorm = 1.6907, lr_0 = 4.1850e-04
Loss = 4.8610e-01, PNorm = 57.9490, GNorm = 2.3086, lr_0 = 4.1821e-04
Loss = 4.5974e-01, PNorm = 57.9570, GNorm = 2.0561, lr_0 = 4.1793e-04
Loss = 4.2838e-01, PNorm = 57.9722, GNorm = 1.1801, lr_0 = 4.1764e-04
Loss = 3.7273e-01, PNorm = 57.9795, GNorm = 1.3399, lr_0 = 4.1736e-04
Loss = 4.0433e-01, PNorm = 57.9875, GNorm = 1.7818, lr_0 = 4.1707e-04
Loss = 3.9905e-01, PNorm = 57.9908, GNorm = 1.6929, lr_0 = 4.1678e-04
Loss = 3.9990e-01, PNorm = 58.0002, GNorm = 1.7450, lr_0 = 4.1650e-04
Loss = 4.6178e-01, PNorm = 58.0017, GNorm = 1.5145, lr_0 = 4.1621e-04
Loss = 4.1242e-01, PNorm = 58.0094, GNorm = 0.9892, lr_0 = 4.1593e-04
Loss = 3.7939e-01, PNorm = 58.0176, GNorm = 1.2213, lr_0 = 4.1564e-04
Loss = 3.9975e-01, PNorm = 58.0272, GNorm = 1.3060, lr_0 = 4.1536e-04
Loss = 3.4546e-01, PNorm = 58.0282, GNorm = 1.5005, lr_0 = 4.1507e-04
Loss = 4.0794e-01, PNorm = 58.0374, GNorm = 1.5777, lr_0 = 4.1479e-04
Loss = 4.1862e-01, PNorm = 58.0477, GNorm = 1.1230, lr_0 = 4.1450e-04
Loss = 4.0381e-01, PNorm = 58.0523, GNorm = 1.2894, lr_0 = 4.1422e-04
Loss = 4.3950e-01, PNorm = 58.0617, GNorm = 1.3745, lr_0 = 4.1394e-04
Loss = 4.2843e-01, PNorm = 58.0658, GNorm = 1.2782, lr_0 = 4.1365e-04
Loss = 4.0019e-01, PNorm = 58.0718, GNorm = 1.1502, lr_0 = 4.1337e-04
Loss = 3.4709e-01, PNorm = 58.0750, GNorm = 1.4741, lr_0 = 4.1309e-04
Loss = 3.8933e-01, PNorm = 58.0814, GNorm = 1.5782, lr_0 = 4.1280e-04
Loss = 3.7592e-01, PNorm = 58.0907, GNorm = 1.2912, lr_0 = 4.1252e-04
Loss = 4.3370e-01, PNorm = 58.0953, GNorm = 1.7888, lr_0 = 4.1224e-04
Loss = 3.6931e-01, PNorm = 58.1060, GNorm = 2.0618, lr_0 = 4.1196e-04
Loss = 3.5865e-01, PNorm = 58.1096, GNorm = 1.2555, lr_0 = 4.1167e-04
Loss = 3.8153e-01, PNorm = 58.1173, GNorm = 1.1734, lr_0 = 4.1139e-04
Loss = 3.9916e-01, PNorm = 58.1196, GNorm = 1.3581, lr_0 = 4.1111e-04
Loss = 3.6543e-01, PNorm = 58.1215, GNorm = 1.2288, lr_0 = 4.1083e-04
Loss = 4.5888e-01, PNorm = 58.1273, GNorm = 2.6079, lr_0 = 4.1055e-04
Loss = 4.3301e-01, PNorm = 58.1303, GNorm = 1.4780, lr_0 = 4.1027e-04
Loss = 3.7142e-01, PNorm = 58.1425, GNorm = 1.9753, lr_0 = 4.0998e-04
Loss = 4.0820e-01, PNorm = 58.1462, GNorm = 2.2758, lr_0 = 4.0970e-04
Loss = 4.1359e-01, PNorm = 58.1549, GNorm = 1.2052, lr_0 = 4.0942e-04
Loss = 3.9379e-01, PNorm = 58.1605, GNorm = 1.4514, lr_0 = 4.0914e-04
Loss = 4.2912e-01, PNorm = 58.1705, GNorm = 1.2557, lr_0 = 4.0886e-04
Loss = 4.1074e-01, PNorm = 58.1775, GNorm = 1.4001, lr_0 = 4.0858e-04
Loss = 4.3815e-01, PNorm = 58.1850, GNorm = 1.8856, lr_0 = 4.0830e-04
Loss = 3.2217e-01, PNorm = 58.1867, GNorm = 1.1287, lr_0 = 4.0802e-04
Loss = 4.5354e-01, PNorm = 58.1902, GNorm = 1.6976, lr_0 = 4.0774e-04
Loss = 4.2408e-01, PNorm = 58.2007, GNorm = 1.4761, lr_0 = 4.0746e-04
Loss = 3.9367e-01, PNorm = 58.2041, GNorm = 1.6489, lr_0 = 4.0718e-04
Loss = 3.8494e-01, PNorm = 58.2127, GNorm = 2.2457, lr_0 = 4.0691e-04
Loss = 3.9304e-01, PNorm = 58.2244, GNorm = 1.3560, lr_0 = 4.0663e-04
Loss = 3.8878e-01, PNorm = 58.2281, GNorm = 1.9002, lr_0 = 4.0635e-04
Loss = 4.2489e-01, PNorm = 58.2414, GNorm = 1.2277, lr_0 = 4.0607e-04
Loss = 3.9106e-01, PNorm = 58.2466, GNorm = 1.5351, lr_0 = 4.0579e-04
Loss = 3.8349e-01, PNorm = 58.2547, GNorm = 2.2250, lr_0 = 4.0551e-04
Loss = 4.0526e-01, PNorm = 58.2632, GNorm = 1.9414, lr_0 = 4.0524e-04
Loss = 3.9285e-01, PNorm = 58.2719, GNorm = 1.3501, lr_0 = 4.0496e-04
Loss = 4.2427e-01, PNorm = 58.2787, GNorm = 1.7835, lr_0 = 4.0468e-04
Validation mae = 0.113846
Epoch 13
Loss = 4.0226e-01, PNorm = 58.2845, GNorm = 1.2798, lr_0 = 4.0440e-04
Loss = 3.8357e-01, PNorm = 58.2916, GNorm = 1.2585, lr_0 = 4.0413e-04
Loss = 3.6496e-01, PNorm = 58.2955, GNorm = 1.2831, lr_0 = 4.0385e-04
Loss = 3.6537e-01, PNorm = 58.3033, GNorm = 1.5604, lr_0 = 4.0357e-04
Loss = 4.5184e-01, PNorm = 58.3073, GNorm = 1.3024, lr_0 = 4.0330e-04
Loss = 3.6431e-01, PNorm = 58.3151, GNorm = 1.3121, lr_0 = 4.0302e-04
Loss = 3.7825e-01, PNorm = 58.3189, GNorm = 1.5484, lr_0 = 4.0274e-04
Loss = 3.4155e-01, PNorm = 58.3232, GNorm = 0.8540, lr_0 = 4.0247e-04
Loss = 3.6110e-01, PNorm = 58.3274, GNorm = 1.3229, lr_0 = 4.0219e-04
Loss = 4.6130e-01, PNorm = 58.3311, GNorm = 1.7632, lr_0 = 4.0192e-04
Loss = 4.3347e-01, PNorm = 58.3337, GNorm = 1.3065, lr_0 = 4.0164e-04
Loss = 3.4352e-01, PNorm = 58.3413, GNorm = 1.3013, lr_0 = 4.0137e-04
Loss = 3.7957e-01, PNorm = 58.3478, GNorm = 1.2538, lr_0 = 4.0109e-04
Loss = 3.3728e-01, PNorm = 58.3528, GNorm = 1.0122, lr_0 = 4.0082e-04
Loss = 3.4643e-01, PNorm = 58.3552, GNorm = 1.3165, lr_0 = 4.0054e-04
Loss = 4.1067e-01, PNorm = 58.3612, GNorm = 1.4215, lr_0 = 4.0027e-04
Loss = 3.7567e-01, PNorm = 58.3718, GNorm = 1.5089, lr_0 = 3.9999e-04
Loss = 4.4399e-01, PNorm = 58.3780, GNorm = 1.1972, lr_0 = 3.9972e-04
Loss = 3.6644e-01, PNorm = 58.3810, GNorm = 1.6466, lr_0 = 3.9945e-04
Loss = 3.5221e-01, PNorm = 58.3878, GNorm = 1.2872, lr_0 = 3.9917e-04
Loss = 4.4816e-01, PNorm = 58.3940, GNorm = 2.0587, lr_0 = 3.9890e-04
Loss = 4.2862e-01, PNorm = 58.3976, GNorm = 1.1960, lr_0 = 3.9863e-04
Loss = 3.6699e-01, PNorm = 58.4018, GNorm = 1.6780, lr_0 = 3.9835e-04
Loss = 4.1274e-01, PNorm = 58.4062, GNorm = 2.1133, lr_0 = 3.9808e-04
Loss = 4.1836e-01, PNorm = 58.4140, GNorm = 1.4313, lr_0 = 3.9781e-04
Loss = 3.7417e-01, PNorm = 58.4160, GNorm = 1.8858, lr_0 = 3.9753e-04
Loss = 3.5486e-01, PNorm = 58.4194, GNorm = 1.1036, lr_0 = 3.9726e-04
Loss = 3.5204e-01, PNorm = 58.4281, GNorm = 0.9069, lr_0 = 3.9699e-04
Loss = 3.5405e-01, PNorm = 58.4334, GNorm = 1.2107, lr_0 = 3.9672e-04
Loss = 4.0036e-01, PNorm = 58.4413, GNorm = 1.3853, lr_0 = 3.9645e-04
Loss = 4.1950e-01, PNorm = 58.4460, GNorm = 1.4435, lr_0 = 3.9617e-04
Loss = 4.0277e-01, PNorm = 58.4531, GNorm = 1.1420, lr_0 = 3.9590e-04
Loss = 4.0496e-01, PNorm = 58.4614, GNorm = 1.2602, lr_0 = 3.9563e-04
Loss = 4.3284e-01, PNorm = 58.4715, GNorm = 1.4369, lr_0 = 3.9536e-04
Loss = 3.7894e-01, PNorm = 58.4812, GNorm = 1.0334, lr_0 = 3.9509e-04
Loss = 4.1211e-01, PNorm = 58.4893, GNorm = 1.3700, lr_0 = 3.9482e-04
Loss = 3.3493e-01, PNorm = 58.4904, GNorm = 1.4920, lr_0 = 3.9455e-04
Loss = 3.5191e-01, PNorm = 58.4972, GNorm = 1.0580, lr_0 = 3.9428e-04
Loss = 3.8033e-01, PNorm = 58.5053, GNorm = 1.0999, lr_0 = 3.9401e-04
Loss = 3.7267e-01, PNorm = 58.5116, GNorm = 1.5657, lr_0 = 3.9374e-04
Loss = 4.1726e-01, PNorm = 58.5190, GNorm = 1.1379, lr_0 = 3.9347e-04
Loss = 3.9295e-01, PNorm = 58.5239, GNorm = 1.4991, lr_0 = 3.9320e-04
Loss = 4.0676e-01, PNorm = 58.5318, GNorm = 1.5142, lr_0 = 3.9293e-04
Loss = 3.8712e-01, PNorm = 58.5414, GNorm = 1.2347, lr_0 = 3.9266e-04
Loss = 4.0532e-01, PNorm = 58.5494, GNorm = 1.8726, lr_0 = 3.9239e-04
Loss = 3.7787e-01, PNorm = 58.5513, GNorm = 1.4823, lr_0 = 3.9212e-04
Loss = 3.5697e-01, PNorm = 58.5548, GNorm = 1.2064, lr_0 = 3.9185e-04
Loss = 4.0840e-01, PNorm = 58.5591, GNorm = 1.0574, lr_0 = 3.9159e-04
Loss = 4.3521e-01, PNorm = 58.5630, GNorm = 1.2388, lr_0 = 3.9132e-04
Loss = 3.8657e-01, PNorm = 58.5657, GNorm = 1.2757, lr_0 = 3.9105e-04
Loss = 4.2915e-01, PNorm = 58.5694, GNorm = 1.9573, lr_0 = 3.9078e-04
Loss = 4.0022e-01, PNorm = 58.5758, GNorm = 1.5174, lr_0 = 3.9051e-04
Loss = 4.6726e-01, PNorm = 58.5783, GNorm = 1.0438, lr_0 = 3.9025e-04
Loss = 3.3508e-01, PNorm = 58.5831, GNorm = 1.4338, lr_0 = 3.8998e-04
Loss = 3.7155e-01, PNorm = 58.5864, GNorm = 1.1360, lr_0 = 3.8971e-04
Loss = 4.2362e-01, PNorm = 58.5952, GNorm = 1.1714, lr_0 = 3.8945e-04
Loss = 4.4595e-01, PNorm = 58.6061, GNorm = 1.6051, lr_0 = 3.8918e-04
Loss = 3.8155e-01, PNorm = 58.6104, GNorm = 1.3200, lr_0 = 3.8891e-04
Loss = 4.5872e-01, PNorm = 58.6218, GNorm = 1.2266, lr_0 = 3.8865e-04
Loss = 3.7902e-01, PNorm = 58.6279, GNorm = 1.2651, lr_0 = 3.8838e-04
Loss = 3.6902e-01, PNorm = 58.6371, GNorm = 1.2168, lr_0 = 3.8811e-04
Loss = 3.6640e-01, PNorm = 58.6410, GNorm = 1.0906, lr_0 = 3.8785e-04
Loss = 3.7785e-01, PNorm = 58.6432, GNorm = 1.1026, lr_0 = 3.8758e-04
Loss = 4.3396e-01, PNorm = 58.6513, GNorm = 1.1635, lr_0 = 3.8732e-04
Loss = 3.6653e-01, PNorm = 58.6555, GNorm = 1.0790, lr_0 = 3.8705e-04
Loss = 3.5162e-01, PNorm = 58.6591, GNorm = 1.7296, lr_0 = 3.8679e-04
Loss = 3.9030e-01, PNorm = 58.6678, GNorm = 1.3895, lr_0 = 3.8652e-04
Loss = 3.6879e-01, PNorm = 58.6702, GNorm = 1.3148, lr_0 = 3.8626e-04
Loss = 4.1678e-01, PNorm = 58.6806, GNorm = 1.6690, lr_0 = 3.8599e-04
Loss = 3.9255e-01, PNorm = 58.6878, GNorm = 1.1424, lr_0 = 3.8573e-04
Loss = 4.1113e-01, PNorm = 58.6927, GNorm = 1.0624, lr_0 = 3.8546e-04
Loss = 4.1795e-01, PNorm = 58.6956, GNorm = 1.6021, lr_0 = 3.8520e-04
Loss = 4.0509e-01, PNorm = 58.7029, GNorm = 1.2144, lr_0 = 3.8493e-04
Loss = 4.4714e-01, PNorm = 58.7080, GNorm = 1.7238, lr_0 = 3.8467e-04
Loss = 4.5352e-01, PNorm = 58.7118, GNorm = 2.3080, lr_0 = 3.8441e-04
Loss = 3.9373e-01, PNorm = 58.7123, GNorm = 1.5374, lr_0 = 3.8414e-04
Loss = 4.0040e-01, PNorm = 58.7189, GNorm = 1.5074, lr_0 = 3.8388e-04
Loss = 3.3891e-01, PNorm = 58.7261, GNorm = 1.4379, lr_0 = 3.8362e-04
Loss = 4.4128e-01, PNorm = 58.7321, GNorm = 2.4180, lr_0 = 3.8336e-04
Loss = 3.8167e-01, PNorm = 58.7446, GNorm = 1.3853, lr_0 = 3.8309e-04
Loss = 4.4549e-01, PNorm = 58.7531, GNorm = 1.6694, lr_0 = 3.8283e-04
Loss = 4.4656e-01, PNorm = 58.7589, GNorm = 2.3147, lr_0 = 3.8257e-04
Loss = 4.2037e-01, PNorm = 58.7655, GNorm = 1.2139, lr_0 = 3.8231e-04
Loss = 4.4106e-01, PNorm = 58.7704, GNorm = 1.6940, lr_0 = 3.8204e-04
Loss = 4.4179e-01, PNorm = 58.7803, GNorm = 1.5919, lr_0 = 3.8178e-04
Loss = 3.6653e-01, PNorm = 58.7878, GNorm = 1.3862, lr_0 = 3.8152e-04
Loss = 3.6415e-01, PNorm = 58.7894, GNorm = 1.2103, lr_0 = 3.8126e-04
Loss = 4.0457e-01, PNorm = 58.7946, GNorm = 1.1570, lr_0 = 3.8100e-04
Loss = 3.8713e-01, PNorm = 58.7996, GNorm = 1.2737, lr_0 = 3.8074e-04
Loss = 3.9499e-01, PNorm = 58.8056, GNorm = 1.4738, lr_0 = 3.8048e-04
Loss = 4.0365e-01, PNorm = 58.8107, GNorm = 1.4619, lr_0 = 3.8022e-04
Loss = 3.6240e-01, PNorm = 58.8173, GNorm = 1.8722, lr_0 = 3.7995e-04
Loss = 3.7202e-01, PNorm = 58.8213, GNorm = 1.5805, lr_0 = 3.7969e-04
Loss = 4.0293e-01, PNorm = 58.8221, GNorm = 1.6432, lr_0 = 3.7943e-04
Loss = 3.8535e-01, PNorm = 58.8267, GNorm = 1.7118, lr_0 = 3.7917e-04
Loss = 4.3716e-01, PNorm = 58.8328, GNorm = 2.3993, lr_0 = 3.7891e-04
Loss = 3.6271e-01, PNorm = 58.8394, GNorm = 1.4044, lr_0 = 3.7866e-04
Loss = 3.4091e-01, PNorm = 58.8444, GNorm = 1.8925, lr_0 = 3.7840e-04
Loss = 3.6906e-01, PNorm = 58.8507, GNorm = 1.1923, lr_0 = 3.7814e-04
Loss = 3.9825e-01, PNorm = 58.8574, GNorm = 1.2552, lr_0 = 3.7788e-04
Loss = 3.6203e-01, PNorm = 58.8575, GNorm = 1.3351, lr_0 = 3.7762e-04
Loss = 4.2770e-01, PNorm = 58.8636, GNorm = 1.0458, lr_0 = 3.7736e-04
Loss = 4.2193e-01, PNorm = 58.8716, GNorm = 1.1310, lr_0 = 3.7710e-04
Loss = 3.5564e-01, PNorm = 58.8779, GNorm = 1.7668, lr_0 = 3.7684e-04
Loss = 3.8619e-01, PNorm = 58.8823, GNorm = 1.6060, lr_0 = 3.7659e-04
Loss = 4.6028e-01, PNorm = 58.8897, GNorm = 1.4402, lr_0 = 3.7633e-04
Loss = 3.8806e-01, PNorm = 58.8943, GNorm = 1.1288, lr_0 = 3.7607e-04
Loss = 4.1538e-01, PNorm = 58.8998, GNorm = 1.7956, lr_0 = 3.7581e-04
Loss = 4.0742e-01, PNorm = 58.9052, GNorm = 1.1094, lr_0 = 3.7555e-04
Loss = 3.6702e-01, PNorm = 58.9125, GNorm = 1.6220, lr_0 = 3.7530e-04
Loss = 3.7585e-01, PNorm = 58.9144, GNorm = 1.6235, lr_0 = 3.7504e-04
Loss = 3.6984e-01, PNorm = 58.9216, GNorm = 1.9004, lr_0 = 3.7478e-04
Loss = 3.8353e-01, PNorm = 58.9270, GNorm = 1.6041, lr_0 = 3.7453e-04
Loss = 3.8213e-01, PNorm = 58.9297, GNorm = 1.1276, lr_0 = 3.7427e-04
Loss = 3.6929e-01, PNorm = 58.9360, GNorm = 1.4667, lr_0 = 3.7401e-04
Loss = 3.5545e-01, PNorm = 58.9427, GNorm = 1.3310, lr_0 = 3.7376e-04
Loss = 4.3538e-01, PNorm = 58.9450, GNorm = 1.1589, lr_0 = 3.7350e-04
Loss = 4.2539e-01, PNorm = 58.9477, GNorm = 1.1118, lr_0 = 3.7325e-04
Loss = 3.7903e-01, PNorm = 58.9524, GNorm = 1.6144, lr_0 = 3.7299e-04
Loss = 3.7643e-01, PNorm = 58.9588, GNorm = 1.3143, lr_0 = 3.7273e-04
Validation mae = 0.113455
Epoch 14
Loss = 3.9418e-01, PNorm = 58.9632, GNorm = 1.0871, lr_0 = 3.7248e-04
Loss = 3.6802e-01, PNorm = 58.9710, GNorm = 1.5122, lr_0 = 3.7222e-04
Loss = 3.8289e-01, PNorm = 58.9776, GNorm = 1.3904, lr_0 = 3.7197e-04
Loss = 3.7504e-01, PNorm = 58.9829, GNorm = 1.6900, lr_0 = 3.7171e-04
Loss = 3.4332e-01, PNorm = 58.9843, GNorm = 1.1118, lr_0 = 3.7146e-04
Loss = 3.5977e-01, PNorm = 58.9908, GNorm = 1.0506, lr_0 = 3.7120e-04
Loss = 3.8533e-01, PNorm = 58.9957, GNorm = 1.3274, lr_0 = 3.7095e-04
Loss = 3.7653e-01, PNorm = 59.0028, GNorm = 2.0682, lr_0 = 3.7070e-04
Loss = 4.2477e-01, PNorm = 59.0026, GNorm = 3.2700, lr_0 = 3.7044e-04
Loss = 4.2374e-01, PNorm = 59.0049, GNorm = 1.0571, lr_0 = 3.7019e-04
Loss = 3.9520e-01, PNorm = 59.0081, GNorm = 1.0619, lr_0 = 3.6993e-04
Loss = 5.2341e-01, PNorm = 59.0103, GNorm = 2.1195, lr_0 = 3.6968e-04
Loss = 3.3281e-01, PNorm = 59.0157, GNorm = 1.4985, lr_0 = 3.6943e-04
Loss = 3.6826e-01, PNorm = 59.0190, GNorm = 1.0759, lr_0 = 3.6917e-04
Loss = 3.5588e-01, PNorm = 59.0273, GNorm = 1.4711, lr_0 = 3.6892e-04
Loss = 4.1152e-01, PNorm = 59.0358, GNorm = 1.7721, lr_0 = 3.6867e-04
Loss = 3.9484e-01, PNorm = 59.0429, GNorm = 1.4842, lr_0 = 3.6842e-04
Loss = 3.7929e-01, PNorm = 59.0468, GNorm = 1.5765, lr_0 = 3.6816e-04
Loss = 3.6661e-01, PNorm = 59.0565, GNorm = 1.8657, lr_0 = 3.6791e-04
Loss = 3.5619e-01, PNorm = 59.0582, GNorm = 0.8972, lr_0 = 3.6766e-04
Loss = 4.2634e-01, PNorm = 59.0680, GNorm = 1.2685, lr_0 = 3.6741e-04
Loss = 3.3429e-01, PNorm = 59.0741, GNorm = 1.1462, lr_0 = 3.6716e-04
Loss = 4.0401e-01, PNorm = 59.0783, GNorm = 1.3837, lr_0 = 3.6690e-04
Loss = 4.2703e-01, PNorm = 59.0833, GNorm = 1.3397, lr_0 = 3.6665e-04
Loss = 3.8186e-01, PNorm = 59.0879, GNorm = 1.3302, lr_0 = 3.6640e-04
Loss = 3.4448e-01, PNorm = 59.0895, GNorm = 1.7593, lr_0 = 3.6615e-04
Loss = 3.8723e-01, PNorm = 59.0953, GNorm = 1.2372, lr_0 = 3.6590e-04
Loss = 3.2168e-01, PNorm = 59.1009, GNorm = 1.3904, lr_0 = 3.6565e-04
Loss = 4.1438e-01, PNorm = 59.1074, GNorm = 1.9163, lr_0 = 3.6540e-04
Loss = 4.0012e-01, PNorm = 59.1138, GNorm = 1.8295, lr_0 = 3.6515e-04
Loss = 3.9066e-01, PNorm = 59.1184, GNorm = 1.4472, lr_0 = 3.6490e-04
Loss = 3.8324e-01, PNorm = 59.1243, GNorm = 1.0999, lr_0 = 3.6465e-04
Loss = 3.2741e-01, PNorm = 59.1246, GNorm = 1.5005, lr_0 = 3.6440e-04
Loss = 3.4140e-01, PNorm = 59.1289, GNorm = 1.2268, lr_0 = 3.6415e-04
Loss = 4.0432e-01, PNorm = 59.1360, GNorm = 1.3763, lr_0 = 3.6390e-04
Loss = 4.3489e-01, PNorm = 59.1405, GNorm = 1.8654, lr_0 = 3.6365e-04
Loss = 3.7133e-01, PNorm = 59.1441, GNorm = 1.1779, lr_0 = 3.6340e-04
Loss = 3.7076e-01, PNorm = 59.1476, GNorm = 1.8008, lr_0 = 3.6315e-04
Loss = 4.5248e-01, PNorm = 59.1520, GNorm = 1.4385, lr_0 = 3.6290e-04
Loss = 4.5150e-01, PNorm = 59.1583, GNorm = 1.4832, lr_0 = 3.6266e-04
Loss = 3.7981e-01, PNorm = 59.1673, GNorm = 1.4943, lr_0 = 3.6241e-04
Loss = 4.0280e-01, PNorm = 59.1763, GNorm = 1.4746, lr_0 = 3.6216e-04
Loss = 3.6875e-01, PNorm = 59.1773, GNorm = 1.1516, lr_0 = 3.6191e-04
Loss = 4.0474e-01, PNorm = 59.1775, GNorm = 1.2227, lr_0 = 3.6166e-04
Loss = 3.6336e-01, PNorm = 59.1856, GNorm = 0.9874, lr_0 = 3.6141e-04
Loss = 3.7412e-01, PNorm = 59.1943, GNorm = 1.4472, lr_0 = 3.6117e-04
Loss = 4.5636e-01, PNorm = 59.2064, GNorm = 2.7143, lr_0 = 3.6092e-04
Loss = 4.1151e-01, PNorm = 59.2121, GNorm = 1.6190, lr_0 = 3.6067e-04
Loss = 4.3261e-01, PNorm = 59.2172, GNorm = 1.3065, lr_0 = 3.6043e-04
Loss = 4.0621e-01, PNorm = 59.2209, GNorm = 2.1919, lr_0 = 3.6018e-04
Loss = 4.2182e-01, PNorm = 59.2232, GNorm = 1.3176, lr_0 = 3.5993e-04
Loss = 3.8930e-01, PNorm = 59.2241, GNorm = 1.6797, lr_0 = 3.5969e-04
Loss = 4.1397e-01, PNorm = 59.2274, GNorm = 1.4489, lr_0 = 3.5944e-04
Loss = 3.8752e-01, PNorm = 59.2343, GNorm = 0.8656, lr_0 = 3.5919e-04
Loss = 3.5732e-01, PNorm = 59.2415, GNorm = 2.3538, lr_0 = 3.5895e-04
Loss = 4.0221e-01, PNorm = 59.2458, GNorm = 1.4181, lr_0 = 3.5870e-04
Loss = 4.0679e-01, PNorm = 59.2489, GNorm = 1.3387, lr_0 = 3.5845e-04
Loss = 3.7055e-01, PNorm = 59.2518, GNorm = 1.7233, lr_0 = 3.5821e-04
Loss = 4.0241e-01, PNorm = 59.2541, GNorm = 1.4500, lr_0 = 3.5796e-04
Loss = 3.8540e-01, PNorm = 59.2577, GNorm = 1.3267, lr_0 = 3.5772e-04
Loss = 3.9255e-01, PNorm = 59.2639, GNorm = 1.7430, lr_0 = 3.5747e-04
Loss = 3.6760e-01, PNorm = 59.2655, GNorm = 1.2466, lr_0 = 3.5723e-04
Loss = 4.0260e-01, PNorm = 59.2696, GNorm = 1.3475, lr_0 = 3.5698e-04
Loss = 3.6535e-01, PNorm = 59.2764, GNorm = 1.5555, lr_0 = 3.5674e-04
Loss = 3.9017e-01, PNorm = 59.2856, GNorm = 0.9613, lr_0 = 3.5650e-04
Loss = 3.4077e-01, PNorm = 59.2909, GNorm = 0.9197, lr_0 = 3.5625e-04
Loss = 4.1541e-01, PNorm = 59.2972, GNorm = 1.2337, lr_0 = 3.5601e-04
Loss = 4.0426e-01, PNorm = 59.3035, GNorm = 1.7234, lr_0 = 3.5576e-04
Loss = 3.5714e-01, PNorm = 59.3075, GNorm = 1.9946, lr_0 = 3.5552e-04
Loss = 4.0925e-01, PNorm = 59.3142, GNorm = 2.4885, lr_0 = 3.5528e-04
Loss = 4.2687e-01, PNorm = 59.3224, GNorm = 1.4529, lr_0 = 3.5503e-04
Loss = 3.7931e-01, PNorm = 59.3276, GNorm = 0.9791, lr_0 = 3.5479e-04
Loss = 3.7635e-01, PNorm = 59.3331, GNorm = 1.7850, lr_0 = 3.5455e-04
Loss = 4.1693e-01, PNorm = 59.3397, GNorm = 1.5298, lr_0 = 3.5430e-04
Loss = 4.3804e-01, PNorm = 59.3392, GNorm = 1.6629, lr_0 = 3.5406e-04
Loss = 3.8190e-01, PNorm = 59.3430, GNorm = 1.0485, lr_0 = 3.5382e-04
Loss = 3.3286e-01, PNorm = 59.3488, GNorm = 1.4159, lr_0 = 3.5358e-04
Loss = 3.5321e-01, PNorm = 59.3524, GNorm = 1.0961, lr_0 = 3.5333e-04
Loss = 4.3386e-01, PNorm = 59.3577, GNorm = 1.8629, lr_0 = 3.5309e-04
Loss = 3.7885e-01, PNorm = 59.3604, GNorm = 1.1928, lr_0 = 3.5285e-04
Loss = 3.4490e-01, PNorm = 59.3695, GNorm = 1.6350, lr_0 = 3.5261e-04
Loss = 3.6595e-01, PNorm = 59.3776, GNorm = 1.1766, lr_0 = 3.5237e-04
Loss = 4.2443e-01, PNorm = 59.3855, GNorm = 1.3562, lr_0 = 3.5212e-04
Loss = 4.0060e-01, PNorm = 59.3844, GNorm = 1.4520, lr_0 = 3.5188e-04
Loss = 4.1125e-01, PNorm = 59.3867, GNorm = 1.0642, lr_0 = 3.5164e-04
Loss = 3.6033e-01, PNorm = 59.3926, GNorm = 1.2785, lr_0 = 3.5140e-04
Loss = 4.3125e-01, PNorm = 59.3950, GNorm = 1.3441, lr_0 = 3.5116e-04
Loss = 4.0610e-01, PNorm = 59.4015, GNorm = 1.6662, lr_0 = 3.5092e-04
Loss = 3.8850e-01, PNorm = 59.4120, GNorm = 1.8828, lr_0 = 3.5068e-04
Loss = 3.8670e-01, PNorm = 59.4176, GNorm = 1.3770, lr_0 = 3.5044e-04
Loss = 3.4445e-01, PNorm = 59.4187, GNorm = 1.2000, lr_0 = 3.5020e-04
Loss = 3.6267e-01, PNorm = 59.4173, GNorm = 1.2641, lr_0 = 3.4996e-04
Loss = 4.3531e-01, PNorm = 59.4191, GNorm = 1.3421, lr_0 = 3.4972e-04
Loss = 3.7772e-01, PNorm = 59.4272, GNorm = 0.9421, lr_0 = 3.4948e-04
Loss = 3.4952e-01, PNorm = 59.4293, GNorm = 1.3321, lr_0 = 3.4924e-04
Loss = 4.1244e-01, PNorm = 59.4333, GNorm = 1.2332, lr_0 = 3.4900e-04
Loss = 4.2416e-01, PNorm = 59.4336, GNorm = 1.1466, lr_0 = 3.4876e-04
Loss = 4.1336e-01, PNorm = 59.4382, GNorm = 1.4639, lr_0 = 3.4852e-04
Loss = 4.0840e-01, PNorm = 59.4466, GNorm = 1.2208, lr_0 = 3.4828e-04
Loss = 4.6473e-01, PNorm = 59.4523, GNorm = 2.1220, lr_0 = 3.4805e-04
Loss = 3.6422e-01, PNorm = 59.4590, GNorm = 1.1062, lr_0 = 3.4781e-04
Loss = 3.8696e-01, PNorm = 59.4663, GNorm = 1.5179, lr_0 = 3.4757e-04
Loss = 3.9572e-01, PNorm = 59.4696, GNorm = 1.3601, lr_0 = 3.4733e-04
Loss = 3.7288e-01, PNorm = 59.4742, GNorm = 1.7813, lr_0 = 3.4709e-04
Loss = 4.1415e-01, PNorm = 59.4775, GNorm = 1.0799, lr_0 = 3.4686e-04
Loss = 4.1032e-01, PNorm = 59.4825, GNorm = 1.0773, lr_0 = 3.4662e-04
Loss = 4.1027e-01, PNorm = 59.4836, GNorm = 1.3918, lr_0 = 3.4638e-04
Loss = 3.7062e-01, PNorm = 59.4896, GNorm = 1.2157, lr_0 = 3.4614e-04
Loss = 3.7585e-01, PNorm = 59.4956, GNorm = 1.3514, lr_0 = 3.4591e-04
Loss = 4.0152e-01, PNorm = 59.5000, GNorm = 1.5058, lr_0 = 3.4567e-04
Loss = 3.7225e-01, PNorm = 59.5041, GNorm = 1.5202, lr_0 = 3.4543e-04
Loss = 3.9976e-01, PNorm = 59.5082, GNorm = 1.5975, lr_0 = 3.4520e-04
Loss = 4.0100e-01, PNorm = 59.5126, GNorm = 3.3074, lr_0 = 3.4496e-04
Loss = 4.0940e-01, PNorm = 59.5143, GNorm = 1.1575, lr_0 = 3.4472e-04
Loss = 4.2518e-01, PNorm = 59.5239, GNorm = 1.5899, lr_0 = 3.4449e-04
Loss = 3.5409e-01, PNorm = 59.5281, GNorm = 1.0136, lr_0 = 3.4425e-04
Loss = 4.2149e-01, PNorm = 59.5302, GNorm = 1.5937, lr_0 = 3.4402e-04
Loss = 3.8852e-01, PNorm = 59.5381, GNorm = 1.0355, lr_0 = 3.4378e-04
Loss = 3.4382e-01, PNorm = 59.5426, GNorm = 1.2213, lr_0 = 3.4354e-04
Loss = 4.7431e-01, PNorm = 59.5456, GNorm = 1.3106, lr_0 = 3.4331e-04
Validation mae = 0.112685
Epoch 15
Loss = 4.0508e-01, PNorm = 59.5497, GNorm = 1.0851, lr_0 = 3.4307e-04
Loss = 3.5067e-01, PNorm = 59.5582, GNorm = 1.1647, lr_0 = 3.4284e-04
Loss = 3.8120e-01, PNorm = 59.5662, GNorm = 1.2138, lr_0 = 3.4260e-04
Loss = 3.7211e-01, PNorm = 59.5708, GNorm = 1.3897, lr_0 = 3.4237e-04
Loss = 3.9258e-01, PNorm = 59.5740, GNorm = 1.3256, lr_0 = 3.4213e-04
Loss = 4.1825e-01, PNorm = 59.5757, GNorm = 1.1205, lr_0 = 3.4190e-04
Loss = 3.7146e-01, PNorm = 59.5843, GNorm = 2.3206, lr_0 = 3.4167e-04
Loss = 3.6426e-01, PNorm = 59.5881, GNorm = 1.9607, lr_0 = 3.4143e-04
Loss = 3.2797e-01, PNorm = 59.5986, GNorm = 0.9341, lr_0 = 3.4120e-04
Loss = 3.9375e-01, PNorm = 59.6026, GNorm = 1.1558, lr_0 = 3.4096e-04
Loss = 3.9598e-01, PNorm = 59.6099, GNorm = 1.1283, lr_0 = 3.4073e-04
Loss = 4.5623e-01, PNorm = 59.6122, GNorm = 2.1221, lr_0 = 3.4050e-04
Loss = 3.6932e-01, PNorm = 59.6194, GNorm = 1.2328, lr_0 = 3.4026e-04
Loss = 3.4289e-01, PNorm = 59.6321, GNorm = 1.1207, lr_0 = 3.4003e-04
Loss = 3.3696e-01, PNorm = 59.6359, GNorm = 1.4651, lr_0 = 3.3980e-04
Loss = 3.9033e-01, PNorm = 59.6386, GNorm = 1.5998, lr_0 = 3.3956e-04
Loss = 3.9395e-01, PNorm = 59.6443, GNorm = 1.7312, lr_0 = 3.3933e-04
Loss = 3.9015e-01, PNorm = 59.6528, GNorm = 1.5389, lr_0 = 3.3910e-04
Loss = 3.8092e-01, PNorm = 59.6631, GNorm = 1.5861, lr_0 = 3.3887e-04
Loss = 4.3107e-01, PNorm = 59.6709, GNorm = 1.1203, lr_0 = 3.3864e-04
Loss = 4.3844e-01, PNorm = 59.6772, GNorm = 1.4565, lr_0 = 3.3840e-04
Loss = 3.5602e-01, PNorm = 59.6851, GNorm = 2.0852, lr_0 = 3.3817e-04
Loss = 4.0795e-01, PNorm = 59.6828, GNorm = 2.1241, lr_0 = 3.3794e-04
Loss = 3.4321e-01, PNorm = 59.6858, GNorm = 1.2501, lr_0 = 3.3771e-04
Loss = 3.7814e-01, PNorm = 59.6855, GNorm = 1.2109, lr_0 = 3.3748e-04
Loss = 3.8105e-01, PNorm = 59.6908, GNorm = 1.4847, lr_0 = 3.3725e-04
Loss = 3.9363e-01, PNorm = 59.6987, GNorm = 1.1545, lr_0 = 3.3701e-04
Loss = 3.6491e-01, PNorm = 59.7048, GNorm = 1.5602, lr_0 = 3.3678e-04
Loss = 3.8648e-01, PNorm = 59.7071, GNorm = 1.4323, lr_0 = 3.3655e-04
Loss = 4.2106e-01, PNorm = 59.7133, GNorm = 1.5957, lr_0 = 3.3632e-04
Loss = 3.5101e-01, PNorm = 59.7124, GNorm = 1.3773, lr_0 = 3.3609e-04
Loss = 3.9100e-01, PNorm = 59.7196, GNorm = 1.2964, lr_0 = 3.3586e-04
Loss = 3.8802e-01, PNorm = 59.7235, GNorm = 1.7563, lr_0 = 3.3563e-04
Loss = 3.4537e-01, PNorm = 59.7257, GNorm = 1.0128, lr_0 = 3.3540e-04
Loss = 3.3148e-01, PNorm = 59.7307, GNorm = 1.5062, lr_0 = 3.3517e-04
Loss = 4.2390e-01, PNorm = 59.7328, GNorm = 1.4001, lr_0 = 3.3494e-04
Loss = 3.7730e-01, PNorm = 59.7328, GNorm = 1.1996, lr_0 = 3.3471e-04
Loss = 4.0702e-01, PNorm = 59.7367, GNorm = 1.6662, lr_0 = 3.3448e-04
Loss = 4.3428e-01, PNorm = 59.7402, GNorm = 1.7104, lr_0 = 3.3425e-04
Loss = 3.7884e-01, PNorm = 59.7472, GNorm = 1.6863, lr_0 = 3.3403e-04
Loss = 3.9007e-01, PNorm = 59.7540, GNorm = 0.8295, lr_0 = 3.3380e-04
Loss = 3.7884e-01, PNorm = 59.7557, GNorm = 1.3502, lr_0 = 3.3357e-04
Loss = 3.6281e-01, PNorm = 59.7632, GNorm = 1.4336, lr_0 = 3.3334e-04
Loss = 4.4266e-01, PNorm = 59.7687, GNorm = 1.7855, lr_0 = 3.3311e-04
Loss = 3.8818e-01, PNorm = 59.7765, GNorm = 2.0381, lr_0 = 3.3288e-04
Loss = 4.2009e-01, PNorm = 59.7771, GNorm = 1.5439, lr_0 = 3.3265e-04
Loss = 3.6282e-01, PNorm = 59.7836, GNorm = 1.3739, lr_0 = 3.3243e-04
Loss = 4.2618e-01, PNorm = 59.7879, GNorm = 1.3276, lr_0 = 3.3220e-04
Loss = 4.2513e-01, PNorm = 59.7919, GNorm = 1.3462, lr_0 = 3.3197e-04
Loss = 3.6796e-01, PNorm = 59.7990, GNorm = 1.3624, lr_0 = 3.3174e-04
Loss = 3.8482e-01, PNorm = 59.8048, GNorm = 1.6333, lr_0 = 3.3152e-04
Loss = 3.5488e-01, PNorm = 59.8089, GNorm = 1.0531, lr_0 = 3.3129e-04
Loss = 4.3293e-01, PNorm = 59.8136, GNorm = 1.6041, lr_0 = 3.3106e-04
Loss = 4.6979e-01, PNorm = 59.8188, GNorm = 1.8563, lr_0 = 3.3084e-04
Loss = 3.5900e-01, PNorm = 59.8222, GNorm = 1.0103, lr_0 = 3.3061e-04
Loss = 4.2092e-01, PNorm = 59.8270, GNorm = 1.3918, lr_0 = 3.3038e-04
Loss = 3.3549e-01, PNorm = 59.8332, GNorm = 1.0942, lr_0 = 3.3016e-04
Loss = 4.0081e-01, PNorm = 59.8360, GNorm = 1.2260, lr_0 = 3.2993e-04
Loss = 3.4476e-01, PNorm = 59.8399, GNorm = 1.1547, lr_0 = 3.2970e-04
Loss = 3.7177e-01, PNorm = 59.8403, GNorm = 1.6194, lr_0 = 3.2948e-04
Loss = 3.7374e-01, PNorm = 59.8456, GNorm = 1.6124, lr_0 = 3.2925e-04
Loss = 3.3811e-01, PNorm = 59.8512, GNorm = 2.3263, lr_0 = 3.2903e-04
Loss = 4.2744e-01, PNorm = 59.8526, GNorm = 1.8699, lr_0 = 3.2880e-04
Loss = 3.9486e-01, PNorm = 59.8605, GNorm = 1.5258, lr_0 = 3.2858e-04
Loss = 3.7689e-01, PNorm = 59.8637, GNorm = 1.0278, lr_0 = 3.2835e-04
Loss = 3.6909e-01, PNorm = 59.8679, GNorm = 1.5641, lr_0 = 3.2813e-04
Loss = 3.1705e-01, PNorm = 59.8686, GNorm = 1.7833, lr_0 = 3.2790e-04
Loss = 3.3093e-01, PNorm = 59.8710, GNorm = 1.2110, lr_0 = 3.2768e-04
Loss = 3.3326e-01, PNorm = 59.8710, GNorm = 1.6840, lr_0 = 3.2745e-04
Loss = 4.3240e-01, PNorm = 59.8713, GNorm = 1.5482, lr_0 = 3.2723e-04
Loss = 3.5361e-01, PNorm = 59.8785, GNorm = 1.4958, lr_0 = 3.2700e-04
Loss = 3.7172e-01, PNorm = 59.8827, GNorm = 1.8341, lr_0 = 3.2678e-04
Loss = 4.0037e-01, PNorm = 59.8871, GNorm = 2.1294, lr_0 = 3.2656e-04
Loss = 3.5473e-01, PNorm = 59.8909, GNorm = 1.4910, lr_0 = 3.2633e-04
Loss = 4.2724e-01, PNorm = 59.8976, GNorm = 2.1108, lr_0 = 3.2611e-04
Loss = 3.7298e-01, PNorm = 59.9037, GNorm = 1.5052, lr_0 = 3.2589e-04
Loss = 3.7261e-01, PNorm = 59.9043, GNorm = 1.6868, lr_0 = 3.2566e-04
Loss = 3.7351e-01, PNorm = 59.9079, GNorm = 2.2427, lr_0 = 3.2544e-04
Loss = 3.9000e-01, PNorm = 59.9133, GNorm = 1.0665, lr_0 = 3.2522e-04
Loss = 3.4671e-01, PNorm = 59.9191, GNorm = 1.4110, lr_0 = 3.2499e-04
Loss = 3.4235e-01, PNorm = 59.9213, GNorm = 1.3688, lr_0 = 3.2477e-04
Loss = 3.4006e-01, PNorm = 59.9259, GNorm = 1.1910, lr_0 = 3.2455e-04
Loss = 3.7230e-01, PNorm = 59.9294, GNorm = 1.3932, lr_0 = 3.2433e-04
Loss = 3.8871e-01, PNorm = 59.9316, GNorm = 1.2162, lr_0 = 3.2410e-04
Loss = 3.4587e-01, PNorm = 59.9387, GNorm = 1.1867, lr_0 = 3.2388e-04
Loss = 3.7596e-01, PNorm = 59.9456, GNorm = 1.5502, lr_0 = 3.2366e-04
Loss = 3.8876e-01, PNorm = 59.9509, GNorm = 1.3158, lr_0 = 3.2344e-04
Loss = 4.3206e-01, PNorm = 59.9581, GNorm = 1.4195, lr_0 = 3.2322e-04
Loss = 3.7026e-01, PNorm = 59.9642, GNorm = 1.3171, lr_0 = 3.2300e-04
Loss = 3.9321e-01, PNorm = 59.9690, GNorm = 1.5335, lr_0 = 3.2277e-04
Loss = 4.8581e-01, PNorm = 59.9711, GNorm = 2.4617, lr_0 = 3.2255e-04
Loss = 3.7596e-01, PNorm = 59.9747, GNorm = 1.5399, lr_0 = 3.2233e-04
Loss = 4.0327e-01, PNorm = 59.9767, GNorm = 1.3062, lr_0 = 3.2211e-04
Loss = 4.1434e-01, PNorm = 59.9828, GNorm = 1.2193, lr_0 = 3.2189e-04
Loss = 3.9109e-01, PNorm = 59.9912, GNorm = 1.1426, lr_0 = 3.2167e-04
Loss = 3.5427e-01, PNorm = 59.9976, GNorm = 1.8154, lr_0 = 3.2145e-04
Loss = 3.9994e-01, PNorm = 60.0019, GNorm = 1.2755, lr_0 = 3.2123e-04
Loss = 3.5375e-01, PNorm = 60.0079, GNorm = 1.6457, lr_0 = 3.2101e-04
Loss = 3.5053e-01, PNorm = 60.0130, GNorm = 1.0580, lr_0 = 3.2079e-04
Loss = 3.8121e-01, PNorm = 60.0142, GNorm = 1.2654, lr_0 = 3.2057e-04
Loss = 4.1195e-01, PNorm = 60.0182, GNorm = 1.1572, lr_0 = 3.2035e-04
Loss = 3.5998e-01, PNorm = 60.0210, GNorm = 1.2902, lr_0 = 3.2013e-04
Loss = 3.9798e-01, PNorm = 60.0268, GNorm = 1.3575, lr_0 = 3.1991e-04
Loss = 3.7244e-01, PNorm = 60.0291, GNorm = 1.2714, lr_0 = 3.1969e-04
Loss = 4.2001e-01, PNorm = 60.0344, GNorm = 1.2779, lr_0 = 3.1947e-04
Loss = 3.9433e-01, PNorm = 60.0420, GNorm = 1.4881, lr_0 = 3.1925e-04
Loss = 3.8160e-01, PNorm = 60.0455, GNorm = 1.7513, lr_0 = 3.1904e-04
Loss = 4.0014e-01, PNorm = 60.0521, GNorm = 1.4586, lr_0 = 3.1882e-04
Loss = 3.6160e-01, PNorm = 60.0552, GNorm = 1.0696, lr_0 = 3.1860e-04
Loss = 3.7226e-01, PNorm = 60.0603, GNorm = 2.1614, lr_0 = 3.1838e-04
Loss = 3.8401e-01, PNorm = 60.0586, GNorm = 1.5336, lr_0 = 3.1816e-04
Loss = 3.9494e-01, PNorm = 60.0631, GNorm = 1.2632, lr_0 = 3.1794e-04
Loss = 3.7737e-01, PNorm = 60.0660, GNorm = 1.9131, lr_0 = 3.1773e-04
Loss = 3.4356e-01, PNorm = 60.0693, GNorm = 1.7213, lr_0 = 3.1751e-04
Loss = 3.9996e-01, PNorm = 60.0720, GNorm = 2.1857, lr_0 = 3.1729e-04
Loss = 3.7236e-01, PNorm = 60.0714, GNorm = 1.3501, lr_0 = 3.1707e-04
Loss = 3.7787e-01, PNorm = 60.0769, GNorm = 1.9434, lr_0 = 3.1686e-04
Loss = 4.7368e-01, PNorm = 60.0793, GNorm = 1.3017, lr_0 = 3.1664e-04
Loss = 3.8926e-01, PNorm = 60.0839, GNorm = 1.4051, lr_0 = 3.1642e-04
Loss = 4.3299e-01, PNorm = 60.0893, GNorm = 1.8650, lr_0 = 3.1621e-04
Validation mae = 0.112135
Epoch 16
Loss = 3.5124e-01, PNorm = 60.0965, GNorm = 1.6792, lr_0 = 3.1599e-04
Loss = 3.8766e-01, PNorm = 60.1002, GNorm = 1.2627, lr_0 = 3.1577e-04
Loss = 3.6909e-01, PNorm = 60.1049, GNorm = 1.2791, lr_0 = 3.1556e-04
Loss = 3.9912e-01, PNorm = 60.1049, GNorm = 1.1957, lr_0 = 3.1534e-04
Loss = 3.7415e-01, PNorm = 60.1088, GNorm = 1.1281, lr_0 = 3.1512e-04
Loss = 3.8289e-01, PNorm = 60.1147, GNorm = 1.4448, lr_0 = 3.1491e-04
Loss = 3.3799e-01, PNorm = 60.1154, GNorm = 1.3950, lr_0 = 3.1469e-04
Loss = 3.8629e-01, PNorm = 60.1179, GNorm = 1.3622, lr_0 = 3.1448e-04
Loss = 3.7412e-01, PNorm = 60.1244, GNorm = 1.3880, lr_0 = 3.1426e-04
Loss = 3.5629e-01, PNorm = 60.1311, GNorm = 1.7477, lr_0 = 3.1405e-04
Loss = 4.1043e-01, PNorm = 60.1331, GNorm = 1.4098, lr_0 = 3.1383e-04
Loss = 3.9136e-01, PNorm = 60.1410, GNorm = 2.4242, lr_0 = 3.1362e-04
Loss = 3.5823e-01, PNorm = 60.1453, GNorm = 1.8195, lr_0 = 3.1340e-04
Loss = 4.2637e-01, PNorm = 60.1516, GNorm = 1.3949, lr_0 = 3.1319e-04
Loss = 3.8994e-01, PNorm = 60.1562, GNorm = 1.6612, lr_0 = 3.1297e-04
Loss = 3.6333e-01, PNorm = 60.1572, GNorm = 1.0220, lr_0 = 3.1276e-04
Loss = 4.2789e-01, PNorm = 60.1601, GNorm = 1.6285, lr_0 = 3.1254e-04
Loss = 3.4113e-01, PNorm = 60.1648, GNorm = 2.0171, lr_0 = 3.1233e-04
Loss = 3.8610e-01, PNorm = 60.1701, GNorm = 1.1246, lr_0 = 3.1212e-04
Loss = 3.4989e-01, PNorm = 60.1758, GNorm = 1.5669, lr_0 = 3.1190e-04
Loss = 4.0194e-01, PNorm = 60.1781, GNorm = 1.1519, lr_0 = 3.1169e-04
Loss = 4.1304e-01, PNorm = 60.1797, GNorm = 1.2680, lr_0 = 3.1147e-04
Loss = 3.9983e-01, PNorm = 60.1847, GNorm = 1.5325, lr_0 = 3.1126e-04
Loss = 3.4217e-01, PNorm = 60.1948, GNorm = 2.1271, lr_0 = 3.1105e-04
Loss = 4.2680e-01, PNorm = 60.1968, GNorm = 1.3967, lr_0 = 3.1083e-04
Loss = 4.0873e-01, PNorm = 60.2009, GNorm = 1.3716, lr_0 = 3.1062e-04
Loss = 3.4001e-01, PNorm = 60.2038, GNorm = 1.1550, lr_0 = 3.1041e-04
Loss = 4.0168e-01, PNorm = 60.2072, GNorm = 1.8795, lr_0 = 3.1020e-04
Loss = 3.5207e-01, PNorm = 60.2102, GNorm = 1.3928, lr_0 = 3.0998e-04
Loss = 3.7330e-01, PNorm = 60.2105, GNorm = 1.8652, lr_0 = 3.0977e-04
Loss = 3.8288e-01, PNorm = 60.2132, GNorm = 0.9191, lr_0 = 3.0956e-04
Loss = 3.8410e-01, PNorm = 60.2190, GNorm = 1.5260, lr_0 = 3.0935e-04
Loss = 4.1719e-01, PNorm = 60.2248, GNorm = 1.9711, lr_0 = 3.0914e-04
Loss = 3.5391e-01, PNorm = 60.2304, GNorm = 1.3827, lr_0 = 3.0892e-04
Loss = 4.0761e-01, PNorm = 60.2302, GNorm = 1.6437, lr_0 = 3.0871e-04
Loss = 3.7360e-01, PNorm = 60.2359, GNorm = 1.3103, lr_0 = 3.0850e-04
Loss = 3.4118e-01, PNorm = 60.2446, GNorm = 1.3366, lr_0 = 3.0829e-04
Loss = 3.6793e-01, PNorm = 60.2518, GNorm = 2.2888, lr_0 = 3.0808e-04
Loss = 3.8437e-01, PNorm = 60.2582, GNorm = 1.2917, lr_0 = 3.0787e-04
Loss = 3.4592e-01, PNorm = 60.2628, GNorm = 1.2401, lr_0 = 3.0766e-04
Loss = 4.3542e-01, PNorm = 60.2671, GNorm = 1.1705, lr_0 = 3.0745e-04
Loss = 4.3666e-01, PNorm = 60.2735, GNorm = 2.6266, lr_0 = 3.0723e-04
Loss = 3.6900e-01, PNorm = 60.2783, GNorm = 1.7092, lr_0 = 3.0702e-04
Loss = 3.8963e-01, PNorm = 60.2847, GNorm = 1.8777, lr_0 = 3.0681e-04
Loss = 3.6652e-01, PNorm = 60.2875, GNorm = 1.2991, lr_0 = 3.0660e-04
Loss = 3.9781e-01, PNorm = 60.2879, GNorm = 1.4900, lr_0 = 3.0639e-04
Loss = 4.1583e-01, PNorm = 60.2931, GNorm = 1.2109, lr_0 = 3.0618e-04
Loss = 3.7800e-01, PNorm = 60.2935, GNorm = 1.0413, lr_0 = 3.0597e-04
Loss = 3.5284e-01, PNorm = 60.2991, GNorm = 1.4633, lr_0 = 3.0576e-04
Loss = 3.7535e-01, PNorm = 60.3035, GNorm = 1.6507, lr_0 = 3.0555e-04
Loss = 3.3113e-01, PNorm = 60.3067, GNorm = 1.6096, lr_0 = 3.0535e-04
Loss = 4.1890e-01, PNorm = 60.3109, GNorm = 1.2729, lr_0 = 3.0514e-04
Loss = 4.1895e-01, PNorm = 60.3139, GNorm = 1.4383, lr_0 = 3.0493e-04
Loss = 3.7516e-01, PNorm = 60.3181, GNorm = 1.4098, lr_0 = 3.0472e-04
Loss = 3.4984e-01, PNorm = 60.3211, GNorm = 1.0457, lr_0 = 3.0451e-04
Loss = 3.6306e-01, PNorm = 60.3232, GNorm = 1.6766, lr_0 = 3.0430e-04
Loss = 3.6562e-01, PNorm = 60.3243, GNorm = 1.1776, lr_0 = 3.0409e-04
Loss = 4.1291e-01, PNorm = 60.3278, GNorm = 1.3538, lr_0 = 3.0388e-04
Loss = 4.3188e-01, PNorm = 60.3328, GNorm = 1.3002, lr_0 = 3.0368e-04
Loss = 3.7632e-01, PNorm = 60.3327, GNorm = 1.3450, lr_0 = 3.0347e-04
Loss = 4.0962e-01, PNorm = 60.3379, GNorm = 1.4974, lr_0 = 3.0326e-04
Loss = 3.6549e-01, PNorm = 60.3428, GNorm = 1.3219, lr_0 = 3.0305e-04
Loss = 3.5854e-01, PNorm = 60.3458, GNorm = 1.5760, lr_0 = 3.0284e-04
Loss = 3.5775e-01, PNorm = 60.3468, GNorm = 1.0599, lr_0 = 3.0264e-04
Loss = 3.7017e-01, PNorm = 60.3521, GNorm = 1.5175, lr_0 = 3.0243e-04
Loss = 3.6651e-01, PNorm = 60.3555, GNorm = 1.4847, lr_0 = 3.0222e-04
Loss = 3.8500e-01, PNorm = 60.3623, GNorm = 1.5109, lr_0 = 3.0202e-04
Loss = 3.2732e-01, PNorm = 60.3667, GNorm = 1.4450, lr_0 = 3.0181e-04
Loss = 4.1489e-01, PNorm = 60.3703, GNorm = 1.8011, lr_0 = 3.0160e-04
Loss = 3.7254e-01, PNorm = 60.3751, GNorm = 1.2417, lr_0 = 3.0140e-04
Loss = 3.8502e-01, PNorm = 60.3753, GNorm = 1.4457, lr_0 = 3.0119e-04
Loss = 3.5013e-01, PNorm = 60.3768, GNorm = 1.4726, lr_0 = 3.0098e-04
Loss = 3.8657e-01, PNorm = 60.3807, GNorm = 1.7381, lr_0 = 3.0078e-04
Loss = 3.7690e-01, PNorm = 60.3826, GNorm = 1.3649, lr_0 = 3.0057e-04
Loss = 4.2041e-01, PNorm = 60.3865, GNorm = 1.3584, lr_0 = 3.0036e-04
Loss = 3.9793e-01, PNorm = 60.3911, GNorm = 1.4712, lr_0 = 3.0016e-04
Loss = 3.2712e-01, PNorm = 60.3963, GNorm = 1.4768, lr_0 = 2.9995e-04
Loss = 3.6902e-01, PNorm = 60.4017, GNorm = 1.0919, lr_0 = 2.9975e-04
Loss = 4.0169e-01, PNorm = 60.4064, GNorm = 2.2506, lr_0 = 2.9954e-04
Loss = 4.5339e-01, PNorm = 60.4114, GNorm = 1.3328, lr_0 = 2.9934e-04
Loss = 3.5120e-01, PNorm = 60.4139, GNorm = 1.2715, lr_0 = 2.9913e-04
Loss = 4.0174e-01, PNorm = 60.4155, GNorm = 1.0437, lr_0 = 2.9893e-04
Loss = 3.8973e-01, PNorm = 60.4166, GNorm = 1.5749, lr_0 = 2.9872e-04
Loss = 4.2284e-01, PNorm = 60.4218, GNorm = 1.2649, lr_0 = 2.9852e-04
Loss = 3.5379e-01, PNorm = 60.4247, GNorm = 1.2531, lr_0 = 2.9831e-04
Loss = 4.1605e-01, PNorm = 60.4257, GNorm = 2.8183, lr_0 = 2.9811e-04
Loss = 3.7617e-01, PNorm = 60.4265, GNorm = 2.3018, lr_0 = 2.9790e-04
Loss = 3.7873e-01, PNorm = 60.4322, GNorm = 1.8087, lr_0 = 2.9770e-04
Loss = 3.2777e-01, PNorm = 60.4349, GNorm = 1.7722, lr_0 = 2.9750e-04
Loss = 3.7061e-01, PNorm = 60.4389, GNorm = 1.3612, lr_0 = 2.9729e-04
Loss = 3.8715e-01, PNorm = 60.4393, GNorm = 1.7171, lr_0 = 2.9709e-04
Loss = 3.8947e-01, PNorm = 60.4436, GNorm = 1.8510, lr_0 = 2.9689e-04
Loss = 3.2762e-01, PNorm = 60.4508, GNorm = 1.1408, lr_0 = 2.9668e-04
Loss = 3.8920e-01, PNorm = 60.4529, GNorm = 1.6411, lr_0 = 2.9648e-04
Loss = 3.5784e-01, PNorm = 60.4577, GNorm = 1.2747, lr_0 = 2.9628e-04
Loss = 3.8049e-01, PNorm = 60.4598, GNorm = 1.0743, lr_0 = 2.9607e-04
Loss = 3.7316e-01, PNorm = 60.4659, GNorm = 1.3493, lr_0 = 2.9587e-04
Loss = 3.7161e-01, PNorm = 60.4655, GNorm = 1.2079, lr_0 = 2.9567e-04
Loss = 3.8353e-01, PNorm = 60.4711, GNorm = 1.0458, lr_0 = 2.9546e-04
Loss = 3.6572e-01, PNorm = 60.4805, GNorm = 1.0858, lr_0 = 2.9526e-04
Loss = 3.5521e-01, PNorm = 60.4856, GNorm = 1.4610, lr_0 = 2.9506e-04
Loss = 3.7603e-01, PNorm = 60.4884, GNorm = 1.1787, lr_0 = 2.9486e-04
Loss = 3.5019e-01, PNorm = 60.4910, GNorm = 2.3209, lr_0 = 2.9466e-04
Loss = 3.7339e-01, PNorm = 60.4943, GNorm = 1.0388, lr_0 = 2.9445e-04
Loss = 3.4909e-01, PNorm = 60.4965, GNorm = 1.8694, lr_0 = 2.9425e-04
Loss = 4.0058e-01, PNorm = 60.4964, GNorm = 1.6819, lr_0 = 2.9405e-04
Loss = 3.9091e-01, PNorm = 60.4983, GNorm = 1.0483, lr_0 = 2.9385e-04
Loss = 4.4013e-01, PNorm = 60.5059, GNorm = 1.7345, lr_0 = 2.9365e-04
Loss = 4.0113e-01, PNorm = 60.5135, GNorm = 1.2665, lr_0 = 2.9345e-04
Loss = 3.8953e-01, PNorm = 60.5171, GNorm = 1.4950, lr_0 = 2.9325e-04
Loss = 3.0830e-01, PNorm = 60.5203, GNorm = 1.3213, lr_0 = 2.9305e-04
Loss = 3.6648e-01, PNorm = 60.5214, GNorm = 1.2645, lr_0 = 2.9284e-04
Loss = 4.4228e-01, PNorm = 60.5247, GNorm = 1.6833, lr_0 = 2.9264e-04
Loss = 3.7867e-01, PNorm = 60.5314, GNorm = 1.5544, lr_0 = 2.9244e-04
Loss = 3.8551e-01, PNorm = 60.5342, GNorm = 1.5227, lr_0 = 2.9224e-04
Loss = 3.8487e-01, PNorm = 60.5365, GNorm = 1.2601, lr_0 = 2.9204e-04
Loss = 3.6787e-01, PNorm = 60.5442, GNorm = 1.2459, lr_0 = 2.9184e-04
Loss = 4.5430e-01, PNorm = 60.5515, GNorm = 2.5233, lr_0 = 2.9164e-04
Loss = 3.6579e-01, PNorm = 60.5570, GNorm = 1.4019, lr_0 = 2.9144e-04
Loss = 3.5907e-01, PNorm = 60.5607, GNorm = 2.3030, lr_0 = 2.9124e-04
Validation mae = 0.113310
Epoch 17
Loss = 4.0367e-01, PNorm = 60.5676, GNorm = 1.4461, lr_0 = 2.9104e-04
Loss = 3.9900e-01, PNorm = 60.5679, GNorm = 1.3642, lr_0 = 2.9084e-04
Loss = 3.7460e-01, PNorm = 60.5764, GNorm = 1.5288, lr_0 = 2.9065e-04
Loss = 3.3501e-01, PNorm = 60.5840, GNorm = 1.7413, lr_0 = 2.9045e-04
Loss = 3.6899e-01, PNorm = 60.5861, GNorm = 1.4353, lr_0 = 2.9025e-04
Loss = 3.4492e-01, PNorm = 60.5892, GNorm = 1.9311, lr_0 = 2.9005e-04
Loss = 3.4562e-01, PNorm = 60.5935, GNorm = 1.4605, lr_0 = 2.8985e-04
Loss = 3.5379e-01, PNorm = 60.5964, GNorm = 2.2309, lr_0 = 2.8965e-04
Loss = 3.6487e-01, PNorm = 60.5978, GNorm = 1.3590, lr_0 = 2.8945e-04
Loss = 3.8257e-01, PNorm = 60.6020, GNorm = 2.1213, lr_0 = 2.8925e-04
Loss = 3.6391e-01, PNorm = 60.6089, GNorm = 1.2898, lr_0 = 2.8906e-04
Loss = 3.9060e-01, PNorm = 60.6172, GNorm = 1.7115, lr_0 = 2.8886e-04
Loss = 3.9004e-01, PNorm = 60.6173, GNorm = 1.2104, lr_0 = 2.8866e-04
Loss = 3.8450e-01, PNorm = 60.6201, GNorm = 2.0102, lr_0 = 2.8846e-04
Loss = 3.4052e-01, PNorm = 60.6213, GNorm = 1.1645, lr_0 = 2.8826e-04
Loss = 3.8838e-01, PNorm = 60.6251, GNorm = 1.5840, lr_0 = 2.8807e-04
Loss = 3.3953e-01, PNorm = 60.6314, GNorm = 0.9546, lr_0 = 2.8787e-04
Loss = 3.4822e-01, PNorm = 60.6357, GNorm = 1.4436, lr_0 = 2.8767e-04
Loss = 4.0911e-01, PNorm = 60.6375, GNorm = 1.7363, lr_0 = 2.8748e-04
Loss = 4.0850e-01, PNorm = 60.6434, GNorm = 1.2598, lr_0 = 2.8728e-04
Loss = 3.2400e-01, PNorm = 60.6455, GNorm = 0.9810, lr_0 = 2.8708e-04
Loss = 3.4738e-01, PNorm = 60.6460, GNorm = 1.3040, lr_0 = 2.8689e-04
Loss = 3.6434e-01, PNorm = 60.6472, GNorm = 1.9032, lr_0 = 2.8669e-04
Loss = 4.0668e-01, PNorm = 60.6511, GNorm = 1.2292, lr_0 = 2.8649e-04
Loss = 3.6535e-01, PNorm = 60.6533, GNorm = 1.2363, lr_0 = 2.8630e-04
Loss = 4.0023e-01, PNorm = 60.6558, GNorm = 1.7353, lr_0 = 2.8610e-04
Loss = 3.4256e-01, PNorm = 60.6605, GNorm = 1.8783, lr_0 = 2.8590e-04
Loss = 3.9867e-01, PNorm = 60.6624, GNorm = 1.7566, lr_0 = 2.8571e-04
Loss = 3.5346e-01, PNorm = 60.6666, GNorm = 1.5447, lr_0 = 2.8551e-04
Loss = 3.5676e-01, PNorm = 60.6657, GNorm = 1.3657, lr_0 = 2.8532e-04
Loss = 3.9546e-01, PNorm = 60.6679, GNorm = 1.7922, lr_0 = 2.8512e-04
Loss = 4.0552e-01, PNorm = 60.6710, GNorm = 1.4408, lr_0 = 2.8493e-04
Loss = 3.6716e-01, PNorm = 60.6769, GNorm = 1.2232, lr_0 = 2.8473e-04
Loss = 3.6915e-01, PNorm = 60.6821, GNorm = 1.5428, lr_0 = 2.8454e-04
Loss = 3.9764e-01, PNorm = 60.6872, GNorm = 1.3532, lr_0 = 2.8434e-04
Loss = 3.7696e-01, PNorm = 60.6880, GNorm = 1.3261, lr_0 = 2.8415e-04
Loss = 3.6774e-01, PNorm = 60.6888, GNorm = 1.8730, lr_0 = 2.8395e-04
Loss = 4.0632e-01, PNorm = 60.6889, GNorm = 1.6542, lr_0 = 2.8376e-04
Loss = 4.0876e-01, PNorm = 60.6951, GNorm = 1.6263, lr_0 = 2.8356e-04
Loss = 3.4894e-01, PNorm = 60.6972, GNorm = 1.2498, lr_0 = 2.8337e-04
Loss = 2.9018e-01, PNorm = 60.7010, GNorm = 1.0644, lr_0 = 2.8317e-04
Loss = 3.5866e-01, PNorm = 60.7045, GNorm = 1.0487, lr_0 = 2.8298e-04
Loss = 3.6555e-01, PNorm = 60.7089, GNorm = 1.2592, lr_0 = 2.8279e-04
Loss = 3.8939e-01, PNorm = 60.7123, GNorm = 1.5842, lr_0 = 2.8259e-04
Loss = 3.2563e-01, PNorm = 60.7116, GNorm = 1.3646, lr_0 = 2.8240e-04
Loss = 3.6240e-01, PNorm = 60.7118, GNorm = 1.5240, lr_0 = 2.8221e-04
Loss = 3.7988e-01, PNorm = 60.7120, GNorm = 1.7065, lr_0 = 2.8201e-04
Loss = 4.1783e-01, PNorm = 60.7183, GNorm = 2.2262, lr_0 = 2.8182e-04
Loss = 3.5652e-01, PNorm = 60.7291, GNorm = 1.4224, lr_0 = 2.8163e-04
Loss = 4.0915e-01, PNorm = 60.7314, GNorm = 1.8848, lr_0 = 2.8143e-04
Loss = 3.6607e-01, PNorm = 60.7373, GNorm = 1.6503, lr_0 = 2.8124e-04
Loss = 3.6101e-01, PNorm = 60.7416, GNorm = 1.1478, lr_0 = 2.8105e-04
Loss = 3.5102e-01, PNorm = 60.7455, GNorm = 1.1244, lr_0 = 2.8085e-04
Loss = 3.3390e-01, PNorm = 60.7501, GNorm = 1.4718, lr_0 = 2.8066e-04
Loss = 3.6595e-01, PNorm = 60.7494, GNorm = 1.3122, lr_0 = 2.8047e-04
Loss = 3.4833e-01, PNorm = 60.7522, GNorm = 1.5477, lr_0 = 2.8028e-04
Loss = 3.3666e-01, PNorm = 60.7546, GNorm = 1.6088, lr_0 = 2.8009e-04
Loss = 4.0145e-01, PNorm = 60.7585, GNorm = 1.0605, lr_0 = 2.7989e-04
Loss = 4.4772e-01, PNorm = 60.7643, GNorm = 1.6573, lr_0 = 2.7970e-04
Loss = 3.2653e-01, PNorm = 60.7704, GNorm = 1.2100, lr_0 = 2.7951e-04
Loss = 3.7209e-01, PNorm = 60.7707, GNorm = 0.9755, lr_0 = 2.7932e-04
Loss = 3.6983e-01, PNorm = 60.7759, GNorm = 1.4007, lr_0 = 2.7913e-04
Loss = 4.1047e-01, PNorm = 60.7841, GNorm = 2.0085, lr_0 = 2.7894e-04
Loss = 3.3484e-01, PNorm = 60.7852, GNorm = 1.0401, lr_0 = 2.7875e-04
Loss = 3.7793e-01, PNorm = 60.7879, GNorm = 1.4946, lr_0 = 2.7855e-04
Loss = 3.4046e-01, PNorm = 60.7915, GNorm = 1.4352, lr_0 = 2.7836e-04
Loss = 3.4399e-01, PNorm = 60.7938, GNorm = 1.9383, lr_0 = 2.7817e-04
Loss = 4.2560e-01, PNorm = 60.7995, GNorm = 2.3849, lr_0 = 2.7798e-04
Loss = 3.6724e-01, PNorm = 60.7994, GNorm = 1.0994, lr_0 = 2.7779e-04
Loss = 3.6743e-01, PNorm = 60.7985, GNorm = 1.6001, lr_0 = 2.7760e-04
Loss = 3.7738e-01, PNorm = 60.7997, GNorm = 1.4304, lr_0 = 2.7741e-04
Loss = 4.0905e-01, PNorm = 60.8057, GNorm = 1.6071, lr_0 = 2.7722e-04
Loss = 3.4518e-01, PNorm = 60.8132, GNorm = 1.2700, lr_0 = 2.7703e-04
Loss = 3.8952e-01, PNorm = 60.8145, GNorm = 1.9518, lr_0 = 2.7684e-04
Loss = 4.2527e-01, PNorm = 60.8203, GNorm = 1.2023, lr_0 = 2.7665e-04
Loss = 3.8062e-01, PNorm = 60.8261, GNorm = 1.3580, lr_0 = 2.7646e-04
Loss = 4.1650e-01, PNorm = 60.8325, GNorm = 1.9635, lr_0 = 2.7627e-04
Loss = 3.8922e-01, PNorm = 60.8356, GNorm = 2.1891, lr_0 = 2.7608e-04
Loss = 3.9792e-01, PNorm = 60.8413, GNorm = 1.4250, lr_0 = 2.7590e-04
Loss = 4.0617e-01, PNorm = 60.8450, GNorm = 1.4364, lr_0 = 2.7571e-04
Loss = 3.9878e-01, PNorm = 60.8491, GNorm = 1.7758, lr_0 = 2.7552e-04
Loss = 4.3804e-01, PNorm = 60.8563, GNorm = 1.4113, lr_0 = 2.7533e-04
Loss = 3.7072e-01, PNorm = 60.8593, GNorm = 1.5677, lr_0 = 2.7514e-04
Loss = 4.7992e-01, PNorm = 60.8636, GNorm = 1.0891, lr_0 = 2.7495e-04
Loss = 4.2052e-01, PNorm = 60.8679, GNorm = 1.9549, lr_0 = 2.7476e-04
Loss = 3.8740e-01, PNorm = 60.8693, GNorm = 1.3577, lr_0 = 2.7457e-04
Loss = 3.9167e-01, PNorm = 60.8770, GNorm = 1.3556, lr_0 = 2.7439e-04
Loss = 3.7699e-01, PNorm = 60.8801, GNorm = 1.1103, lr_0 = 2.7420e-04
Loss = 3.6200e-01, PNorm = 60.8808, GNorm = 1.4916, lr_0 = 2.7401e-04
Loss = 3.4204e-01, PNorm = 60.8842, GNorm = 1.5169, lr_0 = 2.7382e-04
Loss = 3.6485e-01, PNorm = 60.8882, GNorm = 1.4149, lr_0 = 2.7364e-04
Loss = 4.1253e-01, PNorm = 60.8913, GNorm = 1.6575, lr_0 = 2.7345e-04
Loss = 3.5457e-01, PNorm = 60.8947, GNorm = 1.4206, lr_0 = 2.7326e-04
Loss = 4.0522e-01, PNorm = 60.8983, GNorm = 1.1001, lr_0 = 2.7307e-04
Loss = 3.4488e-01, PNorm = 60.8997, GNorm = 1.7679, lr_0 = 2.7289e-04
Loss = 3.8277e-01, PNorm = 60.9016, GNorm = 1.4529, lr_0 = 2.7270e-04
Loss = 3.7506e-01, PNorm = 60.9041, GNorm = 1.5464, lr_0 = 2.7251e-04
Loss = 3.6908e-01, PNorm = 60.9084, GNorm = 1.4987, lr_0 = 2.7233e-04
Loss = 3.5384e-01, PNorm = 60.9104, GNorm = 1.1555, lr_0 = 2.7214e-04
Loss = 3.8964e-01, PNorm = 60.9155, GNorm = 1.2173, lr_0 = 2.7195e-04
Loss = 4.0776e-01, PNorm = 60.9189, GNorm = 1.6467, lr_0 = 2.7177e-04
Loss = 3.9928e-01, PNorm = 60.9236, GNorm = 1.1924, lr_0 = 2.7158e-04
Loss = 3.4414e-01, PNorm = 60.9246, GNorm = 1.1364, lr_0 = 2.7139e-04
Loss = 3.7394e-01, PNorm = 60.9298, GNorm = 1.8636, lr_0 = 2.7121e-04
Loss = 3.8432e-01, PNorm = 60.9317, GNorm = 1.6181, lr_0 = 2.7102e-04
Loss = 3.7097e-01, PNorm = 60.9385, GNorm = 1.4202, lr_0 = 2.7084e-04
Loss = 3.6437e-01, PNorm = 60.9447, GNorm = 1.2619, lr_0 = 2.7065e-04
Loss = 3.8764e-01, PNorm = 60.9445, GNorm = 0.8342, lr_0 = 2.7047e-04
Loss = 3.6327e-01, PNorm = 60.9442, GNorm = 1.6372, lr_0 = 2.7028e-04
Loss = 3.6837e-01, PNorm = 60.9473, GNorm = 1.4397, lr_0 = 2.7010e-04
Loss = 3.5671e-01, PNorm = 60.9496, GNorm = 1.2148, lr_0 = 2.6991e-04
Loss = 3.6634e-01, PNorm = 60.9515, GNorm = 1.9059, lr_0 = 2.6973e-04
Loss = 4.4199e-01, PNorm = 60.9573, GNorm = 1.7343, lr_0 = 2.6954e-04
Loss = 3.8363e-01, PNorm = 60.9579, GNorm = 1.2423, lr_0 = 2.6936e-04
Loss = 3.5691e-01, PNorm = 60.9598, GNorm = 1.3611, lr_0 = 2.6917e-04
Loss = 3.8269e-01, PNorm = 60.9635, GNorm = 1.2301, lr_0 = 2.6899e-04
Loss = 2.9052e-01, PNorm = 60.9676, GNorm = 1.1600, lr_0 = 2.6880e-04
Loss = 4.0327e-01, PNorm = 60.9676, GNorm = 1.2767, lr_0 = 2.6862e-04
Loss = 4.2280e-01, PNorm = 60.9694, GNorm = 1.8224, lr_0 = 2.6844e-04
Loss = 3.5970e-01, PNorm = 60.9705, GNorm = 1.7351, lr_0 = 2.6825e-04
Validation mae = 0.111683
Epoch 18
Loss = 3.7020e-01, PNorm = 60.9723, GNorm = 0.8805, lr_0 = 2.6807e-04
Loss = 3.3865e-01, PNorm = 60.9743, GNorm = 1.3195, lr_0 = 2.6788e-04
Loss = 3.4190e-01, PNorm = 60.9765, GNorm = 0.9214, lr_0 = 2.6770e-04
Loss = 3.2097e-01, PNorm = 60.9794, GNorm = 0.9428, lr_0 = 2.6752e-04
Loss = 4.1792e-01, PNorm = 60.9794, GNorm = 1.2438, lr_0 = 2.6733e-04
Loss = 3.7143e-01, PNorm = 60.9817, GNorm = 1.2182, lr_0 = 2.6715e-04
Loss = 3.6206e-01, PNorm = 60.9868, GNorm = 1.3032, lr_0 = 2.6697e-04
Loss = 4.0411e-01, PNorm = 60.9916, GNorm = 1.1323, lr_0 = 2.6678e-04
Loss = 3.8839e-01, PNorm = 60.9967, GNorm = 2.6040, lr_0 = 2.6660e-04
Loss = 3.2742e-01, PNorm = 61.0036, GNorm = 1.8402, lr_0 = 2.6642e-04
Loss = 4.1357e-01, PNorm = 61.0021, GNorm = 1.6961, lr_0 = 2.6624e-04
Loss = 3.5535e-01, PNorm = 61.0036, GNorm = 1.3929, lr_0 = 2.6605e-04
Loss = 3.1658e-01, PNorm = 61.0089, GNorm = 1.5554, lr_0 = 2.6587e-04
Loss = 3.8566e-01, PNorm = 61.0149, GNorm = 1.1029, lr_0 = 2.6569e-04
Loss = 3.6637e-01, PNorm = 61.0218, GNorm = 1.3487, lr_0 = 2.6551e-04
Loss = 3.6447e-01, PNorm = 61.0253, GNorm = 1.6540, lr_0 = 2.6533e-04
Loss = 3.6732e-01, PNorm = 61.0297, GNorm = 1.4066, lr_0 = 2.6514e-04
Loss = 3.8234e-01, PNorm = 61.0312, GNorm = 1.8827, lr_0 = 2.6496e-04
Loss = 3.5133e-01, PNorm = 61.0357, GNorm = 1.4821, lr_0 = 2.6478e-04
Loss = 3.7287e-01, PNorm = 61.0370, GNorm = 1.5440, lr_0 = 2.6460e-04
Loss = 3.2174e-01, PNorm = 61.0373, GNorm = 1.7778, lr_0 = 2.6442e-04
Loss = 3.8469e-01, PNorm = 61.0392, GNorm = 1.4259, lr_0 = 2.6424e-04
Loss = 3.2454e-01, PNorm = 61.0424, GNorm = 1.1355, lr_0 = 2.6406e-04
Loss = 3.7595e-01, PNorm = 61.0449, GNorm = 1.3184, lr_0 = 2.6388e-04
Loss = 3.6757e-01, PNorm = 61.0454, GNorm = 1.5544, lr_0 = 2.6369e-04
Loss = 3.0864e-01, PNorm = 61.0494, GNorm = 1.4109, lr_0 = 2.6351e-04
Loss = 3.5856e-01, PNorm = 61.0522, GNorm = 1.6155, lr_0 = 2.6333e-04
Loss = 3.6765e-01, PNorm = 61.0578, GNorm = 1.3803, lr_0 = 2.6315e-04
Loss = 3.5685e-01, PNorm = 61.0645, GNorm = 1.2378, lr_0 = 2.6297e-04
Loss = 3.3210e-01, PNorm = 61.0689, GNorm = 1.2363, lr_0 = 2.6279e-04
Loss = 3.8409e-01, PNorm = 61.0702, GNorm = 1.2387, lr_0 = 2.6261e-04
Loss = 3.6361e-01, PNorm = 61.0743, GNorm = 1.3217, lr_0 = 2.6243e-04
Loss = 4.2263e-01, PNorm = 61.0765, GNorm = 1.3348, lr_0 = 2.6225e-04
Loss = 3.5421e-01, PNorm = 61.0785, GNorm = 1.3589, lr_0 = 2.6207e-04
Loss = 3.8499e-01, PNorm = 61.0810, GNorm = 2.7618, lr_0 = 2.6189e-04
Loss = 3.3261e-01, PNorm = 61.0816, GNorm = 1.2692, lr_0 = 2.6171e-04
Loss = 3.4465e-01, PNorm = 61.0854, GNorm = 1.1910, lr_0 = 2.6153e-04
Loss = 4.0920e-01, PNorm = 61.0870, GNorm = 2.1634, lr_0 = 2.6136e-04
Loss = 3.1864e-01, PNorm = 61.0908, GNorm = 1.5097, lr_0 = 2.6118e-04
Loss = 3.6634e-01, PNorm = 61.0969, GNorm = 1.1148, lr_0 = 2.6100e-04
Loss = 3.5622e-01, PNorm = 61.0947, GNorm = 1.4679, lr_0 = 2.6082e-04
Loss = 4.3829e-01, PNorm = 61.0994, GNorm = 1.4375, lr_0 = 2.6064e-04
Loss = 3.7755e-01, PNorm = 61.1020, GNorm = 0.9743, lr_0 = 2.6046e-04
Loss = 3.4388e-01, PNorm = 61.1050, GNorm = 2.3147, lr_0 = 2.6028e-04
Loss = 3.2839e-01, PNorm = 61.1118, GNorm = 1.1996, lr_0 = 2.6011e-04
Loss = 4.1626e-01, PNorm = 61.1147, GNorm = 1.2870, lr_0 = 2.5993e-04
Loss = 4.3916e-01, PNorm = 61.1185, GNorm = 1.3980, lr_0 = 2.5975e-04
Loss = 3.7565e-01, PNorm = 61.1259, GNorm = 1.9741, lr_0 = 2.5957e-04
Loss = 3.7116e-01, PNorm = 61.1252, GNorm = 1.7402, lr_0 = 2.5939e-04
Loss = 4.4464e-01, PNorm = 61.1274, GNorm = 1.6151, lr_0 = 2.5922e-04
Loss = 3.7931e-01, PNorm = 61.1338, GNorm = 1.4181, lr_0 = 2.5904e-04
Loss = 4.1500e-01, PNorm = 61.1319, GNorm = 2.4789, lr_0 = 2.5886e-04
Loss = 3.9814e-01, PNorm = 61.1364, GNorm = 1.5780, lr_0 = 2.5868e-04
Loss = 3.5174e-01, PNorm = 61.1405, GNorm = 1.1373, lr_0 = 2.5851e-04
Loss = 3.6384e-01, PNorm = 61.1460, GNorm = 1.3491, lr_0 = 2.5833e-04
Loss = 3.2189e-01, PNorm = 61.1498, GNorm = 1.4280, lr_0 = 2.5815e-04
Loss = 3.7089e-01, PNorm = 61.1519, GNorm = 1.3268, lr_0 = 2.5797e-04
Loss = 4.3941e-01, PNorm = 61.1534, GNorm = 2.0923, lr_0 = 2.5780e-04
Loss = 3.6323e-01, PNorm = 61.1579, GNorm = 1.1030, lr_0 = 2.5762e-04
Loss = 3.6842e-01, PNorm = 61.1586, GNorm = 1.8138, lr_0 = 2.5745e-04
Loss = 3.3666e-01, PNorm = 61.1607, GNorm = 0.9268, lr_0 = 2.5727e-04
Loss = 3.4995e-01, PNorm = 61.1624, GNorm = 1.5035, lr_0 = 2.5709e-04
Loss = 3.9092e-01, PNorm = 61.1673, GNorm = 1.2340, lr_0 = 2.5692e-04
Loss = 4.0129e-01, PNorm = 61.1714, GNorm = 1.8639, lr_0 = 2.5674e-04
Loss = 3.4204e-01, PNorm = 61.1724, GNorm = 1.2396, lr_0 = 2.5656e-04
Loss = 3.3930e-01, PNorm = 61.1786, GNorm = 1.5038, lr_0 = 2.5639e-04
Loss = 4.0051e-01, PNorm = 61.1813, GNorm = 1.5313, lr_0 = 2.5621e-04
Loss = 3.3863e-01, PNorm = 61.1835, GNorm = 1.1964, lr_0 = 2.5604e-04
Loss = 4.2417e-01, PNorm = 61.1847, GNorm = 1.7227, lr_0 = 2.5586e-04
Loss = 3.4786e-01, PNorm = 61.1909, GNorm = 1.4801, lr_0 = 2.5569e-04
Loss = 4.0423e-01, PNorm = 61.1933, GNorm = 1.4652, lr_0 = 2.5551e-04
Loss = 3.8762e-01, PNorm = 61.1961, GNorm = 1.5428, lr_0 = 2.5534e-04
Loss = 3.8175e-01, PNorm = 61.2007, GNorm = 1.2536, lr_0 = 2.5516e-04
Loss = 3.3967e-01, PNorm = 61.2026, GNorm = 1.1411, lr_0 = 2.5499e-04
Loss = 3.6763e-01, PNorm = 61.2038, GNorm = 1.2562, lr_0 = 2.5481e-04
Loss = 3.6255e-01, PNorm = 61.2035, GNorm = 1.7230, lr_0 = 2.5464e-04
Loss = 4.2147e-01, PNorm = 61.2051, GNorm = 1.2154, lr_0 = 2.5446e-04
Loss = 3.2583e-01, PNorm = 61.2092, GNorm = 1.5987, lr_0 = 2.5429e-04
Loss = 3.5940e-01, PNorm = 61.2128, GNorm = 1.6506, lr_0 = 2.5411e-04
Loss = 3.7250e-01, PNorm = 61.2149, GNorm = 1.4026, lr_0 = 2.5394e-04
Loss = 3.9280e-01, PNorm = 61.2193, GNorm = 1.3141, lr_0 = 2.5377e-04
Loss = 4.1439e-01, PNorm = 61.2221, GNorm = 1.4163, lr_0 = 2.5359e-04
Loss = 3.7968e-01, PNorm = 61.2248, GNorm = 1.3048, lr_0 = 2.5342e-04
Loss = 3.7357e-01, PNorm = 61.2295, GNorm = 1.3610, lr_0 = 2.5325e-04
Loss = 3.6234e-01, PNorm = 61.2323, GNorm = 1.1151, lr_0 = 2.5307e-04
Loss = 3.6128e-01, PNorm = 61.2390, GNorm = 1.6187, lr_0 = 2.5290e-04
Loss = 4.1226e-01, PNorm = 61.2416, GNorm = 1.4860, lr_0 = 2.5273e-04
Loss = 3.9922e-01, PNorm = 61.2449, GNorm = 1.4033, lr_0 = 2.5255e-04
Loss = 3.6765e-01, PNorm = 61.2498, GNorm = 1.2299, lr_0 = 2.5238e-04
Loss = 3.0774e-01, PNorm = 61.2512, GNorm = 1.1727, lr_0 = 2.5221e-04
Loss = 3.5102e-01, PNorm = 61.2538, GNorm = 1.5244, lr_0 = 2.5203e-04
Loss = 3.9382e-01, PNorm = 61.2562, GNorm = 1.5278, lr_0 = 2.5186e-04
Loss = 3.6405e-01, PNorm = 61.2587, GNorm = 1.2711, lr_0 = 2.5169e-04
Loss = 3.4123e-01, PNorm = 61.2613, GNorm = 1.2567, lr_0 = 2.5152e-04
Loss = 4.1202e-01, PNorm = 61.2623, GNorm = 1.3532, lr_0 = 2.5134e-04
Loss = 3.9195e-01, PNorm = 61.2655, GNorm = 1.1178, lr_0 = 2.5117e-04
Loss = 3.3581e-01, PNorm = 61.2704, GNorm = 1.4362, lr_0 = 2.5100e-04
Loss = 3.6736e-01, PNorm = 61.2733, GNorm = 1.5864, lr_0 = 2.5083e-04
Loss = 3.6019e-01, PNorm = 61.2750, GNorm = 1.4045, lr_0 = 2.5066e-04
Loss = 3.9562e-01, PNorm = 61.2788, GNorm = 1.3910, lr_0 = 2.5048e-04
Loss = 3.6103e-01, PNorm = 61.2814, GNorm = 1.5675, lr_0 = 2.5031e-04
Loss = 4.1166e-01, PNorm = 61.2844, GNorm = 1.0633, lr_0 = 2.5014e-04
Loss = 3.8671e-01, PNorm = 61.2862, GNorm = 1.8555, lr_0 = 2.4997e-04
Loss = 3.1326e-01, PNorm = 61.2879, GNorm = 1.1923, lr_0 = 2.4980e-04
Loss = 3.6337e-01, PNorm = 61.2926, GNorm = 1.3562, lr_0 = 2.4963e-04
Loss = 3.5763e-01, PNorm = 61.2962, GNorm = 1.0926, lr_0 = 2.4946e-04
Loss = 4.0772e-01, PNorm = 61.3029, GNorm = 2.1924, lr_0 = 2.4929e-04
Loss = 3.4744e-01, PNorm = 61.3027, GNorm = 1.2401, lr_0 = 2.4911e-04
Loss = 3.6548e-01, PNorm = 61.3024, GNorm = 1.1292, lr_0 = 2.4894e-04
Loss = 3.8748e-01, PNorm = 61.3053, GNorm = 1.8926, lr_0 = 2.4877e-04
Loss = 3.8406e-01, PNorm = 61.3125, GNorm = 1.0423, lr_0 = 2.4860e-04
Loss = 3.2414e-01, PNorm = 61.3185, GNorm = 1.3868, lr_0 = 2.4843e-04
Loss = 3.4263e-01, PNorm = 61.3182, GNorm = 1.2118, lr_0 = 2.4826e-04
Loss = 3.7508e-01, PNorm = 61.3205, GNorm = 1.6317, lr_0 = 2.4809e-04
Loss = 3.8905e-01, PNorm = 61.3226, GNorm = 1.3465, lr_0 = 2.4792e-04
Loss = 4.1189e-01, PNorm = 61.3235, GNorm = 1.6411, lr_0 = 2.4775e-04
Loss = 3.4151e-01, PNorm = 61.3242, GNorm = 1.6109, lr_0 = 2.4758e-04
Loss = 4.5328e-01, PNorm = 61.3271, GNorm = 1.6767, lr_0 = 2.4741e-04
Loss = 4.3183e-01, PNorm = 61.3301, GNorm = 1.4552, lr_0 = 2.4724e-04
Loss = 3.9545e-01, PNorm = 61.3316, GNorm = 1.2774, lr_0 = 2.4707e-04
Validation mae = 0.113289
Epoch 19
Loss = 3.8257e-01, PNorm = 61.3359, GNorm = 1.8147, lr_0 = 2.4690e-04
Loss = 3.8010e-01, PNorm = 61.3427, GNorm = 1.6562, lr_0 = 2.4674e-04
Loss = 4.0267e-01, PNorm = 61.3477, GNorm = 1.5704, lr_0 = 2.4657e-04
Loss = 3.3977e-01, PNorm = 61.3534, GNorm = 2.0822, lr_0 = 2.4640e-04
Loss = 3.8266e-01, PNorm = 61.3587, GNorm = 1.0530, lr_0 = 2.4623e-04
Loss = 3.9076e-01, PNorm = 61.3592, GNorm = 1.5355, lr_0 = 2.4606e-04
Loss = 3.8076e-01, PNorm = 61.3669, GNorm = 1.3483, lr_0 = 2.4589e-04
Loss = 4.4757e-01, PNorm = 61.3704, GNorm = 1.4988, lr_0 = 2.4572e-04
Loss = 3.3902e-01, PNorm = 61.3783, GNorm = 1.5089, lr_0 = 2.4556e-04
Loss = 3.5778e-01, PNorm = 61.3824, GNorm = 1.6426, lr_0 = 2.4539e-04
Loss = 3.8166e-01, PNorm = 61.3837, GNorm = 1.8733, lr_0 = 2.4522e-04
Loss = 3.6491e-01, PNorm = 61.3842, GNorm = 1.0726, lr_0 = 2.4505e-04
Loss = 3.7862e-01, PNorm = 61.3885, GNorm = 1.3382, lr_0 = 2.4488e-04
Loss = 3.7960e-01, PNorm = 61.3882, GNorm = 1.3400, lr_0 = 2.4472e-04
Loss = 3.6354e-01, PNorm = 61.3903, GNorm = 1.2054, lr_0 = 2.4455e-04
Loss = 3.6890e-01, PNorm = 61.3960, GNorm = 2.0097, lr_0 = 2.4438e-04
Loss = 3.8233e-01, PNorm = 61.4004, GNorm = 1.0413, lr_0 = 2.4421e-04
Loss = 3.8109e-01, PNorm = 61.4035, GNorm = 1.1935, lr_0 = 2.4405e-04
Loss = 3.4199e-01, PNorm = 61.4078, GNorm = 1.1081, lr_0 = 2.4388e-04
Loss = 3.9233e-01, PNorm = 61.4093, GNorm = 1.4927, lr_0 = 2.4371e-04
Loss = 3.1531e-01, PNorm = 61.4138, GNorm = 1.9434, lr_0 = 2.4354e-04
Loss = 3.2571e-01, PNorm = 61.4170, GNorm = 1.0817, lr_0 = 2.4338e-04
Loss = 3.3147e-01, PNorm = 61.4217, GNorm = 1.4791, lr_0 = 2.4321e-04
Loss = 3.5671e-01, PNorm = 61.4246, GNorm = 1.3984, lr_0 = 2.4304e-04
Loss = 3.8767e-01, PNorm = 61.4256, GNorm = 1.7868, lr_0 = 2.4288e-04
Loss = 3.7354e-01, PNorm = 61.4296, GNorm = 2.0468, lr_0 = 2.4271e-04
Loss = 4.2863e-01, PNorm = 61.4304, GNorm = 1.6598, lr_0 = 2.4254e-04
Loss = 3.7657e-01, PNorm = 61.4340, GNorm = 1.4434, lr_0 = 2.4238e-04
Loss = 4.2068e-01, PNorm = 61.4402, GNorm = 1.9641, lr_0 = 2.4221e-04
Loss = 3.7210e-01, PNorm = 61.4414, GNorm = 1.5141, lr_0 = 2.4205e-04
Loss = 3.4199e-01, PNorm = 61.4454, GNorm = 1.2492, lr_0 = 2.4188e-04
Loss = 3.5297e-01, PNorm = 61.4483, GNorm = 2.0233, lr_0 = 2.4171e-04
Loss = 2.9232e-01, PNorm = 61.4495, GNorm = 1.0892, lr_0 = 2.4155e-04
Loss = 3.9428e-01, PNorm = 61.4515, GNorm = 1.0259, lr_0 = 2.4138e-04
Loss = 3.5662e-01, PNorm = 61.4545, GNorm = 1.2982, lr_0 = 2.4122e-04
Loss = 4.5045e-01, PNorm = 61.4546, GNorm = 1.1973, lr_0 = 2.4105e-04
Loss = 3.3738e-01, PNorm = 61.4595, GNorm = 1.7770, lr_0 = 2.4089e-04
Loss = 3.6652e-01, PNorm = 61.4612, GNorm = 1.1763, lr_0 = 2.4072e-04
Loss = 4.0900e-01, PNorm = 61.4613, GNorm = 1.3213, lr_0 = 2.4056e-04
Loss = 3.2750e-01, PNorm = 61.4647, GNorm = 1.3047, lr_0 = 2.4039e-04
Loss = 3.2766e-01, PNorm = 61.4657, GNorm = 1.6941, lr_0 = 2.4023e-04
Loss = 3.7769e-01, PNorm = 61.4684, GNorm = 1.9212, lr_0 = 2.4006e-04
Loss = 3.5707e-01, PNorm = 61.4735, GNorm = 1.5345, lr_0 = 2.3990e-04
Loss = 3.3910e-01, PNorm = 61.4759, GNorm = 1.3783, lr_0 = 2.3974e-04
Loss = 3.9588e-01, PNorm = 61.4768, GNorm = 1.6693, lr_0 = 2.3957e-04
Loss = 3.5308e-01, PNorm = 61.4781, GNorm = 1.2572, lr_0 = 2.3941e-04
Loss = 3.5083e-01, PNorm = 61.4832, GNorm = 1.3185, lr_0 = 2.3924e-04
Loss = 3.6982e-01, PNorm = 61.4878, GNorm = 1.5604, lr_0 = 2.3908e-04
Loss = 3.9342e-01, PNorm = 61.4884, GNorm = 1.3846, lr_0 = 2.3892e-04
Loss = 3.4880e-01, PNorm = 61.4901, GNorm = 1.4736, lr_0 = 2.3875e-04
Loss = 3.6779e-01, PNorm = 61.4913, GNorm = 1.4188, lr_0 = 2.3859e-04
Loss = 3.4614e-01, PNorm = 61.4920, GNorm = 1.3977, lr_0 = 2.3842e-04
Loss = 3.6164e-01, PNorm = 61.4917, GNorm = 1.1490, lr_0 = 2.3826e-04
Loss = 4.3720e-01, PNorm = 61.4950, GNorm = 2.1095, lr_0 = 2.3810e-04
Loss = 3.2373e-01, PNorm = 61.4967, GNorm = 1.2792, lr_0 = 2.3794e-04
Loss = 3.0954e-01, PNorm = 61.4992, GNorm = 1.6813, lr_0 = 2.3777e-04
Loss = 3.6676e-01, PNorm = 61.5023, GNorm = 1.0979, lr_0 = 2.3761e-04
Loss = 3.5546e-01, PNorm = 61.5051, GNorm = 1.7536, lr_0 = 2.3745e-04
Loss = 3.8738e-01, PNorm = 61.5099, GNorm = 1.2291, lr_0 = 2.3728e-04
Loss = 3.6406e-01, PNorm = 61.5109, GNorm = 1.0687, lr_0 = 2.3712e-04
Loss = 3.7157e-01, PNorm = 61.5138, GNorm = 1.4611, lr_0 = 2.3696e-04
Loss = 3.8507e-01, PNorm = 61.5170, GNorm = 1.6233, lr_0 = 2.3680e-04
Loss = 3.8779e-01, PNorm = 61.5214, GNorm = 1.6499, lr_0 = 2.3663e-04
Loss = 3.4218e-01, PNorm = 61.5270, GNorm = 1.8035, lr_0 = 2.3647e-04
Loss = 4.1104e-01, PNorm = 61.5265, GNorm = 2.0508, lr_0 = 2.3631e-04
Loss = 3.2504e-01, PNorm = 61.5287, GNorm = 1.5531, lr_0 = 2.3615e-04
Loss = 3.7688e-01, PNorm = 61.5306, GNorm = 1.4002, lr_0 = 2.3599e-04
Loss = 3.8319e-01, PNorm = 61.5322, GNorm = 1.4588, lr_0 = 2.3582e-04
Loss = 4.2951e-01, PNorm = 61.5318, GNorm = 1.2275, lr_0 = 2.3566e-04
Loss = 3.2539e-01, PNorm = 61.5364, GNorm = 1.4304, lr_0 = 2.3550e-04
Loss = 3.4999e-01, PNorm = 61.5384, GNorm = 1.2449, lr_0 = 2.3534e-04
Loss = 3.6686e-01, PNorm = 61.5410, GNorm = 1.2827, lr_0 = 2.3518e-04
Loss = 3.1840e-01, PNorm = 61.5430, GNorm = 1.3530, lr_0 = 2.3502e-04
Loss = 3.6161e-01, PNorm = 61.5446, GNorm = 1.1843, lr_0 = 2.3486e-04
Loss = 3.5613e-01, PNorm = 61.5502, GNorm = 1.5243, lr_0 = 2.3470e-04
Loss = 4.0366e-01, PNorm = 61.5498, GNorm = 1.8365, lr_0 = 2.3454e-04
Loss = 3.6050e-01, PNorm = 61.5519, GNorm = 1.3472, lr_0 = 2.3437e-04
Loss = 3.4456e-01, PNorm = 61.5552, GNorm = 1.4277, lr_0 = 2.3421e-04
Loss = 4.1506e-01, PNorm = 61.5574, GNorm = 2.4030, lr_0 = 2.3405e-04
Loss = 3.7996e-01, PNorm = 61.5626, GNorm = 1.2489, lr_0 = 2.3389e-04
Loss = 4.4616e-01, PNorm = 61.5659, GNorm = 1.5262, lr_0 = 2.3373e-04
Loss = 3.9668e-01, PNorm = 61.5699, GNorm = 1.5080, lr_0 = 2.3357e-04
Loss = 4.3337e-01, PNorm = 61.5713, GNorm = 1.8993, lr_0 = 2.3341e-04
Loss = 3.5774e-01, PNorm = 61.5778, GNorm = 1.3831, lr_0 = 2.3325e-04
Loss = 3.2393e-01, PNorm = 61.5806, GNorm = 1.5571, lr_0 = 2.3309e-04
Loss = 4.0277e-01, PNorm = 61.5808, GNorm = 1.7394, lr_0 = 2.3293e-04
Loss = 3.4979e-01, PNorm = 61.5852, GNorm = 1.5192, lr_0 = 2.3277e-04
Loss = 3.8158e-01, PNorm = 61.5890, GNorm = 1.6468, lr_0 = 2.3261e-04
Loss = 3.8577e-01, PNorm = 61.5888, GNorm = 1.4329, lr_0 = 2.3246e-04
Loss = 3.3717e-01, PNorm = 61.5922, GNorm = 1.3621, lr_0 = 2.3230e-04
Loss = 3.4891e-01, PNorm = 61.5957, GNorm = 1.2482, lr_0 = 2.3214e-04
Loss = 3.3474e-01, PNorm = 61.5993, GNorm = 1.5265, lr_0 = 2.3198e-04
Loss = 3.8380e-01, PNorm = 61.6008, GNorm = 1.3970, lr_0 = 2.3182e-04
Loss = 3.7265e-01, PNorm = 61.6058, GNorm = 1.4998, lr_0 = 2.3166e-04
Loss = 4.2333e-01, PNorm = 61.6100, GNorm = 1.3468, lr_0 = 2.3150e-04
Loss = 3.1214e-01, PNorm = 61.6138, GNorm = 1.4062, lr_0 = 2.3134e-04
Loss = 3.7318e-01, PNorm = 61.6156, GNorm = 1.0718, lr_0 = 2.3118e-04
Loss = 3.7621e-01, PNorm = 61.6177, GNorm = 1.5302, lr_0 = 2.3103e-04
Loss = 3.5812e-01, PNorm = 61.6205, GNorm = 1.3610, lr_0 = 2.3087e-04
Loss = 4.0442e-01, PNorm = 61.6245, GNorm = 2.1434, lr_0 = 2.3071e-04
Loss = 3.5181e-01, PNorm = 61.6273, GNorm = 1.9020, lr_0 = 2.3055e-04
Loss = 3.6097e-01, PNorm = 61.6295, GNorm = 1.3053, lr_0 = 2.3039e-04
Loss = 3.9282e-01, PNorm = 61.6310, GNorm = 1.5161, lr_0 = 2.3024e-04
Loss = 3.8344e-01, PNorm = 61.6355, GNorm = 1.2542, lr_0 = 2.3008e-04
Loss = 3.2176e-01, PNorm = 61.6432, GNorm = 1.5007, lr_0 = 2.2992e-04
Loss = 3.1466e-01, PNorm = 61.6472, GNorm = 1.5073, lr_0 = 2.2976e-04
Loss = 3.8214e-01, PNorm = 61.6486, GNorm = 1.2812, lr_0 = 2.2961e-04
Loss = 3.8088e-01, PNorm = 61.6502, GNorm = 1.3979, lr_0 = 2.2945e-04
Loss = 3.6702e-01, PNorm = 61.6515, GNorm = 1.2394, lr_0 = 2.2929e-04
Loss = 3.4578e-01, PNorm = 61.6560, GNorm = 1.0295, lr_0 = 2.2913e-04
Loss = 3.4823e-01, PNorm = 61.6576, GNorm = 1.6440, lr_0 = 2.2898e-04
Loss = 3.4210e-01, PNorm = 61.6608, GNorm = 1.5695, lr_0 = 2.2882e-04
Loss = 3.9892e-01, PNorm = 61.6623, GNorm = 1.4201, lr_0 = 2.2866e-04
Loss = 3.5004e-01, PNorm = 61.6613, GNorm = 1.3942, lr_0 = 2.2851e-04
Loss = 3.7005e-01, PNorm = 61.6619, GNorm = 1.2861, lr_0 = 2.2835e-04
Loss = 3.3611e-01, PNorm = 61.6653, GNorm = 1.3556, lr_0 = 2.2819e-04
Loss = 3.6433e-01, PNorm = 61.6703, GNorm = 1.1129, lr_0 = 2.2804e-04
Loss = 3.7124e-01, PNorm = 61.6722, GNorm = 1.6741, lr_0 = 2.2788e-04
Loss = 3.4986e-01, PNorm = 61.6738, GNorm = 1.6071, lr_0 = 2.2773e-04
Loss = 3.5808e-01, PNorm = 61.6775, GNorm = 1.6539, lr_0 = 2.2757e-04
Validation mae = 0.111870
Epoch 20
Loss = 3.8311e-01, PNorm = 61.6795, GNorm = 1.0830, lr_0 = 2.2741e-04
Loss = 3.4194e-01, PNorm = 61.6843, GNorm = 1.3104, lr_0 = 2.2726e-04
Loss = 4.7770e-01, PNorm = 61.6860, GNorm = 1.3628, lr_0 = 2.2710e-04
Loss = 3.4345e-01, PNorm = 61.6863, GNorm = 2.3781, lr_0 = 2.2695e-04
Loss = 3.8241e-01, PNorm = 61.6861, GNorm = 1.3448, lr_0 = 2.2679e-04
Loss = 3.5363e-01, PNorm = 61.6892, GNorm = 1.2503, lr_0 = 2.2664e-04
Loss = 3.3534e-01, PNorm = 61.6912, GNorm = 1.4164, lr_0 = 2.2648e-04
Loss = 4.3167e-01, PNorm = 61.6949, GNorm = 1.7063, lr_0 = 2.2632e-04
Loss = 3.4619e-01, PNorm = 61.6965, GNorm = 1.8424, lr_0 = 2.2617e-04
Loss = 3.3563e-01, PNorm = 61.6987, GNorm = 0.9893, lr_0 = 2.2601e-04
Loss = 3.6230e-01, PNorm = 61.7036, GNorm = 1.0289, lr_0 = 2.2586e-04
Loss = 3.9884e-01, PNorm = 61.7074, GNorm = 1.2044, lr_0 = 2.2571e-04
Loss = 3.5361e-01, PNorm = 61.7098, GNorm = 1.1086, lr_0 = 2.2555e-04
Loss = 3.6740e-01, PNorm = 61.7126, GNorm = 1.6899, lr_0 = 2.2540e-04
Loss = 3.0997e-01, PNorm = 61.7157, GNorm = 1.4122, lr_0 = 2.2524e-04
Loss = 3.6702e-01, PNorm = 61.7170, GNorm = 1.2880, lr_0 = 2.2509e-04
Loss = 3.2130e-01, PNorm = 61.7175, GNorm = 1.8702, lr_0 = 2.2493e-04
Loss = 3.3662e-01, PNorm = 61.7202, GNorm = 1.1593, lr_0 = 2.2478e-04
Loss = 4.0002e-01, PNorm = 61.7208, GNorm = 1.3353, lr_0 = 2.2463e-04
Loss = 3.7717e-01, PNorm = 61.7257, GNorm = 1.4315, lr_0 = 2.2447e-04
Loss = 4.0213e-01, PNorm = 61.7281, GNorm = 1.4708, lr_0 = 2.2432e-04
Loss = 3.9208e-01, PNorm = 61.7279, GNorm = 1.7821, lr_0 = 2.2416e-04
Loss = 3.8330e-01, PNorm = 61.7287, GNorm = 1.1229, lr_0 = 2.2401e-04
Loss = 3.6882e-01, PNorm = 61.7357, GNorm = 1.7003, lr_0 = 2.2386e-04
Loss = 3.4025e-01, PNorm = 61.7369, GNorm = 1.5318, lr_0 = 2.2370e-04
Loss = 3.6990e-01, PNorm = 61.7412, GNorm = 1.2394, lr_0 = 2.2355e-04
Loss = 3.8993e-01, PNorm = 61.7429, GNorm = 1.6441, lr_0 = 2.2340e-04
Loss = 3.5066e-01, PNorm = 61.7454, GNorm = 1.3726, lr_0 = 2.2324e-04
Loss = 3.4254e-01, PNorm = 61.7476, GNorm = 1.1609, lr_0 = 2.2309e-04
Loss = 3.2160e-01, PNorm = 61.7494, GNorm = 1.3254, lr_0 = 2.2294e-04
Loss = 3.7500e-01, PNorm = 61.7513, GNorm = 1.4357, lr_0 = 2.2279e-04
Loss = 3.7392e-01, PNorm = 61.7559, GNorm = 1.7710, lr_0 = 2.2263e-04
Loss = 3.2771e-01, PNorm = 61.7607, GNorm = 1.5197, lr_0 = 2.2248e-04
Loss = 3.3959e-01, PNorm = 61.7634, GNorm = 1.1789, lr_0 = 2.2233e-04
Loss = 3.7929e-01, PNorm = 61.7627, GNorm = 1.2439, lr_0 = 2.2218e-04
Loss = 3.2211e-01, PNorm = 61.7662, GNorm = 1.3680, lr_0 = 2.2202e-04
Loss = 4.0304e-01, PNorm = 61.7664, GNorm = 1.4577, lr_0 = 2.2187e-04
Loss = 3.3596e-01, PNorm = 61.7688, GNorm = 1.4392, lr_0 = 2.2172e-04
Loss = 3.4063e-01, PNorm = 61.7729, GNorm = 1.3101, lr_0 = 2.2157e-04
Loss = 3.7376e-01, PNorm = 61.7706, GNorm = 1.5478, lr_0 = 2.2142e-04
Loss = 3.8828e-01, PNorm = 61.7756, GNorm = 1.7776, lr_0 = 2.2126e-04
Loss = 3.6339e-01, PNorm = 61.7814, GNorm = 1.5458, lr_0 = 2.2111e-04
Loss = 4.0298e-01, PNorm = 61.7874, GNorm = 1.4980, lr_0 = 2.2096e-04
Loss = 3.3388e-01, PNorm = 61.7901, GNorm = 1.3311, lr_0 = 2.2081e-04
Loss = 3.2972e-01, PNorm = 61.7929, GNorm = 1.9038, lr_0 = 2.2066e-04
Loss = 3.4834e-01, PNorm = 61.7921, GNorm = 1.2291, lr_0 = 2.2051e-04
Loss = 3.8336e-01, PNorm = 61.7940, GNorm = 1.2981, lr_0 = 2.2036e-04
Loss = 3.7302e-01, PNorm = 61.7945, GNorm = 1.5270, lr_0 = 2.2021e-04
Loss = 3.9428e-01, PNorm = 61.7959, GNorm = 2.0848, lr_0 = 2.2005e-04
Loss = 3.6296e-01, PNorm = 61.7984, GNorm = 1.2503, lr_0 = 2.1990e-04
Loss = 3.6289e-01, PNorm = 61.8027, GNorm = 1.4074, lr_0 = 2.1975e-04
Loss = 4.1706e-01, PNorm = 61.8100, GNorm = 1.8328, lr_0 = 2.1960e-04
Loss = 3.6042e-01, PNorm = 61.8145, GNorm = 1.0262, lr_0 = 2.1945e-04
Loss = 3.7604e-01, PNorm = 61.8144, GNorm = 1.0845, lr_0 = 2.1930e-04
Loss = 3.6306e-01, PNorm = 61.8188, GNorm = 1.3897, lr_0 = 2.1915e-04
Loss = 3.9094e-01, PNorm = 61.8223, GNorm = 1.1392, lr_0 = 2.1900e-04
Loss = 3.2174e-01, PNorm = 61.8229, GNorm = 1.3468, lr_0 = 2.1885e-04
Loss = 3.7731e-01, PNorm = 61.8245, GNorm = 1.5460, lr_0 = 2.1870e-04
Loss = 3.3143e-01, PNorm = 61.8285, GNorm = 1.5446, lr_0 = 2.1855e-04
Loss = 3.8821e-01, PNorm = 61.8294, GNorm = 2.0894, lr_0 = 2.1840e-04
Loss = 3.8808e-01, PNorm = 61.8313, GNorm = 1.0937, lr_0 = 2.1825e-04
Loss = 3.2925e-01, PNorm = 61.8318, GNorm = 1.5885, lr_0 = 2.1810e-04
Loss = 3.4655e-01, PNorm = 61.8301, GNorm = 1.4251, lr_0 = 2.1795e-04
Loss = 4.1496e-01, PNorm = 61.8328, GNorm = 1.4400, lr_0 = 2.1780e-04
Loss = 3.7632e-01, PNorm = 61.8356, GNorm = 1.2556, lr_0 = 2.1765e-04
Loss = 3.0190e-01, PNorm = 61.8376, GNorm = 1.2133, lr_0 = 2.1751e-04
Loss = 3.4345e-01, PNorm = 61.8408, GNorm = 1.3965, lr_0 = 2.1736e-04
Loss = 3.1859e-01, PNorm = 61.8445, GNorm = 1.0872, lr_0 = 2.1721e-04
Loss = 3.8208e-01, PNorm = 61.8454, GNorm = 1.7030, lr_0 = 2.1706e-04
Loss = 3.3610e-01, PNorm = 61.8460, GNorm = 1.2039, lr_0 = 2.1691e-04
Loss = 3.4623e-01, PNorm = 61.8481, GNorm = 1.5380, lr_0 = 2.1676e-04
Loss = 3.7810e-01, PNorm = 61.8465, GNorm = 2.1562, lr_0 = 2.1661e-04
Loss = 3.2826e-01, PNorm = 61.8500, GNorm = 1.3226, lr_0 = 2.1646e-04
Loss = 3.7127e-01, PNorm = 61.8517, GNorm = 1.7323, lr_0 = 2.1632e-04
Loss = 3.9023e-01, PNorm = 61.8573, GNorm = 1.3662, lr_0 = 2.1617e-04
Loss = 3.2511e-01, PNorm = 61.8614, GNorm = 1.3846, lr_0 = 2.1602e-04
Loss = 3.5284e-01, PNorm = 61.8625, GNorm = 1.0768, lr_0 = 2.1587e-04
Loss = 4.4268e-01, PNorm = 61.8653, GNorm = 1.3917, lr_0 = 2.1572e-04
Loss = 3.7980e-01, PNorm = 61.8693, GNorm = 1.5482, lr_0 = 2.1558e-04
Loss = 3.5579e-01, PNorm = 61.8711, GNorm = 1.6986, lr_0 = 2.1543e-04
Loss = 3.4288e-01, PNorm = 61.8709, GNorm = 1.2266, lr_0 = 2.1528e-04
Loss = 3.9223e-01, PNorm = 61.8706, GNorm = 1.2482, lr_0 = 2.1513e-04
Loss = 4.3521e-01, PNorm = 61.8738, GNorm = 1.9858, lr_0 = 2.1499e-04
Loss = 3.4941e-01, PNorm = 61.8763, GNorm = 1.5467, lr_0 = 2.1484e-04
Loss = 3.9753e-01, PNorm = 61.8776, GNorm = 1.5046, lr_0 = 2.1469e-04
Loss = 4.1878e-01, PNorm = 61.8815, GNorm = 1.3754, lr_0 = 2.1454e-04
Loss = 3.2592e-01, PNorm = 61.8870, GNorm = 1.4773, lr_0 = 2.1440e-04
Loss = 3.5872e-01, PNorm = 61.8874, GNorm = 1.2849, lr_0 = 2.1425e-04
Loss = 4.0929e-01, PNorm = 61.8892, GNorm = 1.0078, lr_0 = 2.1410e-04
Loss = 4.2594e-01, PNorm = 61.8927, GNorm = 1.3101, lr_0 = 2.1396e-04
Loss = 3.4779e-01, PNorm = 61.8923, GNorm = 1.3650, lr_0 = 2.1381e-04
Loss = 3.6090e-01, PNorm = 61.8937, GNorm = 1.9268, lr_0 = 2.1366e-04
Loss = 3.3285e-01, PNorm = 61.8949, GNorm = 0.8324, lr_0 = 2.1352e-04
Loss = 3.5211e-01, PNorm = 61.8988, GNorm = 1.6268, lr_0 = 2.1337e-04
Loss = 3.6571e-01, PNorm = 61.9010, GNorm = 1.8604, lr_0 = 2.1323e-04
Loss = 3.5767e-01, PNorm = 61.9034, GNorm = 1.3423, lr_0 = 2.1308e-04
Loss = 3.5588e-01, PNorm = 61.9066, GNorm = 1.5722, lr_0 = 2.1293e-04
Loss = 3.3060e-01, PNorm = 61.9090, GNorm = 1.1685, lr_0 = 2.1279e-04
Loss = 3.4004e-01, PNorm = 61.9084, GNorm = 1.5808, lr_0 = 2.1264e-04
Loss = 3.1808e-01, PNorm = 61.9099, GNorm = 1.3134, lr_0 = 2.1250e-04
Loss = 4.0493e-01, PNorm = 61.9147, GNorm = 1.9391, lr_0 = 2.1235e-04
Loss = 4.0274e-01, PNorm = 61.9155, GNorm = 1.4643, lr_0 = 2.1221e-04
Loss = 3.4166e-01, PNorm = 61.9154, GNorm = 1.3544, lr_0 = 2.1206e-04
Loss = 3.5348e-01, PNorm = 61.9168, GNorm = 1.2337, lr_0 = 2.1191e-04
Loss = 3.5920e-01, PNorm = 61.9202, GNorm = 1.9856, lr_0 = 2.1177e-04
Loss = 3.9582e-01, PNorm = 61.9231, GNorm = 3.1369, lr_0 = 2.1162e-04
Loss = 3.9875e-01, PNorm = 61.9251, GNorm = 1.8542, lr_0 = 2.1148e-04
Loss = 3.7576e-01, PNorm = 61.9277, GNorm = 1.2944, lr_0 = 2.1133e-04
Loss = 3.8322e-01, PNorm = 61.9319, GNorm = 1.3083, lr_0 = 2.1119e-04
Loss = 3.1721e-01, PNorm = 61.9372, GNorm = 1.2061, lr_0 = 2.1104e-04
Loss = 3.6273e-01, PNorm = 61.9412, GNorm = 1.4427, lr_0 = 2.1090e-04
Loss = 4.3469e-01, PNorm = 61.9407, GNorm = 1.6251, lr_0 = 2.1076e-04
Loss = 3.3930e-01, PNorm = 61.9424, GNorm = 1.5039, lr_0 = 2.1061e-04
Loss = 3.9873e-01, PNorm = 61.9475, GNorm = 1.0502, lr_0 = 2.1047e-04
Loss = 3.8258e-01, PNorm = 61.9458, GNorm = 1.4769, lr_0 = 2.1032e-04
Loss = 4.0985e-01, PNorm = 61.9432, GNorm = 0.9135, lr_0 = 2.1018e-04
Loss = 3.2398e-01, PNorm = 61.9468, GNorm = 0.8684, lr_0 = 2.1003e-04
Loss = 3.5149e-01, PNorm = 61.9486, GNorm = 1.8008, lr_0 = 2.0989e-04
Loss = 3.6185e-01, PNorm = 61.9470, GNorm = 1.5335, lr_0 = 2.0975e-04
Loss = 3.4041e-01, PNorm = 61.9495, GNorm = 1.3619, lr_0 = 2.0960e-04
Validation mae = 0.112146
Epoch 21
Loss = 3.7355e-01, PNorm = 61.9547, GNorm = 1.2530, lr_0 = 2.0946e-04
Loss = 4.3221e-01, PNorm = 61.9557, GNorm = 1.2906, lr_0 = 2.0932e-04
Loss = 4.0017e-01, PNorm = 61.9575, GNorm = 1.3441, lr_0 = 2.0917e-04
Loss = 3.4712e-01, PNorm = 61.9591, GNorm = 1.6753, lr_0 = 2.0903e-04
Loss = 3.7149e-01, PNorm = 61.9623, GNorm = 1.6757, lr_0 = 2.0889e-04
Loss = 3.8954e-01, PNorm = 61.9658, GNorm = 1.2469, lr_0 = 2.0874e-04
Loss = 3.6103e-01, PNorm = 61.9700, GNorm = 1.4895, lr_0 = 2.0860e-04
Loss = 3.9054e-01, PNorm = 61.9748, GNorm = 1.5911, lr_0 = 2.0846e-04
Loss = 3.9396e-01, PNorm = 61.9770, GNorm = 2.2282, lr_0 = 2.0831e-04
Loss = 3.3823e-01, PNorm = 61.9795, GNorm = 1.5221, lr_0 = 2.0817e-04
Loss = 4.2087e-01, PNorm = 61.9827, GNorm = 1.4886, lr_0 = 2.0803e-04
Loss = 3.2764e-01, PNorm = 61.9829, GNorm = 1.8247, lr_0 = 2.0789e-04
Loss = 3.4887e-01, PNorm = 61.9841, GNorm = 1.7196, lr_0 = 2.0774e-04
Loss = 3.6680e-01, PNorm = 61.9863, GNorm = 1.8574, lr_0 = 2.0760e-04
Loss = 3.5335e-01, PNorm = 61.9888, GNorm = 1.2662, lr_0 = 2.0746e-04
Loss = 3.3046e-01, PNorm = 61.9946, GNorm = 1.4024, lr_0 = 2.0732e-04
Loss = 3.5977e-01, PNorm = 61.9972, GNorm = 1.6238, lr_0 = 2.0718e-04
Loss = 3.4619e-01, PNorm = 61.9979, GNorm = 1.2792, lr_0 = 2.0703e-04
Loss = 3.0209e-01, PNorm = 62.0028, GNorm = 1.5871, lr_0 = 2.0689e-04
Loss = 3.4621e-01, PNorm = 62.0040, GNorm = 1.5060, lr_0 = 2.0675e-04
Loss = 3.3133e-01, PNorm = 62.0062, GNorm = 1.5412, lr_0 = 2.0661e-04
Loss = 3.9699e-01, PNorm = 62.0096, GNorm = 1.2490, lr_0 = 2.0647e-04
Loss = 3.7182e-01, PNorm = 62.0104, GNorm = 1.4261, lr_0 = 2.0633e-04
Loss = 3.6664e-01, PNorm = 62.0122, GNorm = 1.5795, lr_0 = 2.0618e-04
Loss = 3.4330e-01, PNorm = 62.0125, GNorm = 1.5109, lr_0 = 2.0604e-04
Loss = 3.3656e-01, PNorm = 62.0161, GNorm = 1.2930, lr_0 = 2.0590e-04
Loss = 3.6835e-01, PNorm = 62.0202, GNorm = 1.5862, lr_0 = 2.0576e-04
Loss = 3.5907e-01, PNorm = 62.0234, GNorm = 1.8363, lr_0 = 2.0562e-04
Loss = 3.1935e-01, PNorm = 62.0278, GNorm = 1.2612, lr_0 = 2.0548e-04
Loss = 4.1629e-01, PNorm = 62.0322, GNorm = 1.8679, lr_0 = 2.0534e-04
Loss = 3.3928e-01, PNorm = 62.0370, GNorm = 1.1778, lr_0 = 2.0520e-04
Loss = 3.5009e-01, PNorm = 62.0376, GNorm = 1.5220, lr_0 = 2.0506e-04
Loss = 3.6642e-01, PNorm = 62.0399, GNorm = 1.5698, lr_0 = 2.0492e-04
Loss = 4.2641e-01, PNorm = 62.0402, GNorm = 1.0768, lr_0 = 2.0478e-04
Loss = 3.6977e-01, PNorm = 62.0438, GNorm = 1.3811, lr_0 = 2.0464e-04
Loss = 3.6254e-01, PNorm = 62.0443, GNorm = 1.5673, lr_0 = 2.0450e-04
Loss = 3.6496e-01, PNorm = 62.0478, GNorm = 1.5650, lr_0 = 2.0436e-04
Loss = 3.8870e-01, PNorm = 62.0505, GNorm = 1.4614, lr_0 = 2.0422e-04
Loss = 3.5926e-01, PNorm = 62.0531, GNorm = 1.5920, lr_0 = 2.0408e-04
Loss = 4.0375e-01, PNorm = 62.0549, GNorm = 1.3033, lr_0 = 2.0394e-04
Loss = 3.7269e-01, PNorm = 62.0570, GNorm = 1.4471, lr_0 = 2.0380e-04
Loss = 3.9524e-01, PNorm = 62.0588, GNorm = 1.4280, lr_0 = 2.0366e-04
Loss = 3.7184e-01, PNorm = 62.0613, GNorm = 1.8445, lr_0 = 2.0352e-04
Loss = 3.3067e-01, PNorm = 62.0644, GNorm = 1.2279, lr_0 = 2.0338e-04
Loss = 3.4825e-01, PNorm = 62.0678, GNorm = 1.6041, lr_0 = 2.0324e-04
Loss = 3.5433e-01, PNorm = 62.0685, GNorm = 1.3850, lr_0 = 2.0310e-04
Loss = 3.7983e-01, PNorm = 62.0728, GNorm = 1.4188, lr_0 = 2.0296e-04
Loss = 3.4980e-01, PNorm = 62.0769, GNorm = 1.5316, lr_0 = 2.0282e-04
Loss = 3.1183e-01, PNorm = 62.0801, GNorm = 1.2753, lr_0 = 2.0268e-04
Loss = 3.6467e-01, PNorm = 62.0839, GNorm = 1.3142, lr_0 = 2.0254e-04
Loss = 3.4131e-01, PNorm = 62.0828, GNorm = 1.3409, lr_0 = 2.0240e-04
Loss = 3.6590e-01, PNorm = 62.0816, GNorm = 1.0961, lr_0 = 2.0227e-04
Loss = 3.4015e-01, PNorm = 62.0819, GNorm = 1.2775, lr_0 = 2.0213e-04
Loss = 3.5982e-01, PNorm = 62.0832, GNorm = 1.3734, lr_0 = 2.0199e-04
Loss = 3.4469e-01, PNorm = 62.0875, GNorm = 1.7363, lr_0 = 2.0185e-04
Loss = 3.4404e-01, PNorm = 62.0906, GNorm = 1.2428, lr_0 = 2.0171e-04
Loss = 3.4621e-01, PNorm = 62.0890, GNorm = 1.3013, lr_0 = 2.0157e-04
Loss = 3.8476e-01, PNorm = 62.0902, GNorm = 1.3754, lr_0 = 2.0144e-04
Loss = 3.8450e-01, PNorm = 62.0917, GNorm = 1.4002, lr_0 = 2.0130e-04
Loss = 3.6467e-01, PNorm = 62.0942, GNorm = 1.4043, lr_0 = 2.0116e-04
Loss = 3.5880e-01, PNorm = 62.0970, GNorm = 2.2359, lr_0 = 2.0102e-04
Loss = 3.7302e-01, PNorm = 62.1005, GNorm = 1.5275, lr_0 = 2.0088e-04
Loss = 3.4926e-01, PNorm = 62.1054, GNorm = 1.4282, lr_0 = 2.0075e-04
Loss = 3.5881e-01, PNorm = 62.1096, GNorm = 1.4094, lr_0 = 2.0061e-04
Loss = 3.7197e-01, PNorm = 62.1117, GNorm = 1.2477, lr_0 = 2.0047e-04
Loss = 3.6018e-01, PNorm = 62.1113, GNorm = 1.1153, lr_0 = 2.0033e-04
Loss = 4.3119e-01, PNorm = 62.1113, GNorm = 1.4652, lr_0 = 2.0020e-04
Loss = 3.9249e-01, PNorm = 62.1141, GNorm = 1.8006, lr_0 = 2.0006e-04
Loss = 3.4734e-01, PNorm = 62.1171, GNorm = 1.1220, lr_0 = 1.9992e-04
Loss = 3.3551e-01, PNorm = 62.1165, GNorm = 1.6157, lr_0 = 1.9979e-04
Loss = 3.9791e-01, PNorm = 62.1169, GNorm = 1.2204, lr_0 = 1.9965e-04
Loss = 3.5836e-01, PNorm = 62.1193, GNorm = 1.9742, lr_0 = 1.9951e-04
Loss = 3.5259e-01, PNorm = 62.1204, GNorm = 1.4623, lr_0 = 1.9938e-04
Loss = 3.2045e-01, PNorm = 62.1235, GNorm = 2.1710, lr_0 = 1.9924e-04
Loss = 3.7165e-01, PNorm = 62.1230, GNorm = 1.1586, lr_0 = 1.9910e-04
Loss = 3.6157e-01, PNorm = 62.1257, GNorm = 1.4456, lr_0 = 1.9897e-04
Loss = 3.2214e-01, PNorm = 62.1277, GNorm = 0.9513, lr_0 = 1.9883e-04
Loss = 3.0322e-01, PNorm = 62.1295, GNorm = 1.4702, lr_0 = 1.9869e-04
Loss = 3.5419e-01, PNorm = 62.1315, GNorm = 1.3420, lr_0 = 1.9856e-04
Loss = 3.2398e-01, PNorm = 62.1351, GNorm = 0.9459, lr_0 = 1.9842e-04
Loss = 3.9447e-01, PNorm = 62.1370, GNorm = 2.0991, lr_0 = 1.9829e-04
Loss = 3.6297e-01, PNorm = 62.1386, GNorm = 1.7016, lr_0 = 1.9815e-04
Loss = 3.8425e-01, PNorm = 62.1392, GNorm = 1.4279, lr_0 = 1.9801e-04
Loss = 3.2887e-01, PNorm = 62.1384, GNorm = 1.4526, lr_0 = 1.9788e-04
Loss = 3.6034e-01, PNorm = 62.1387, GNorm = 1.7395, lr_0 = 1.9774e-04
Loss = 3.1132e-01, PNorm = 62.1394, GNorm = 1.3322, lr_0 = 1.9761e-04
Loss = 3.6768e-01, PNorm = 62.1425, GNorm = 1.5972, lr_0 = 1.9747e-04
Loss = 3.7042e-01, PNorm = 62.1440, GNorm = 1.3586, lr_0 = 1.9734e-04
Loss = 3.4876e-01, PNorm = 62.1458, GNorm = 1.5120, lr_0 = 1.9720e-04
Loss = 3.6724e-01, PNorm = 62.1475, GNorm = 1.3902, lr_0 = 1.9707e-04
Loss = 2.8788e-01, PNorm = 62.1524, GNorm = 1.5445, lr_0 = 1.9693e-04
Loss = 3.6226e-01, PNorm = 62.1534, GNorm = 1.2346, lr_0 = 1.9680e-04
Loss = 3.6648e-01, PNorm = 62.1568, GNorm = 1.5983, lr_0 = 1.9666e-04
Loss = 3.5507e-01, PNorm = 62.1611, GNorm = 1.5880, lr_0 = 1.9653e-04
Loss = 3.8494e-01, PNorm = 62.1618, GNorm = 1.2966, lr_0 = 1.9639e-04
Loss = 3.2709e-01, PNorm = 62.1641, GNorm = 1.0350, lr_0 = 1.9626e-04
Loss = 4.3244e-01, PNorm = 62.1669, GNorm = 1.0388, lr_0 = 1.9612e-04
Loss = 3.6596e-01, PNorm = 62.1678, GNorm = 1.3977, lr_0 = 1.9599e-04
Loss = 3.7806e-01, PNorm = 62.1720, GNorm = 1.0405, lr_0 = 1.9585e-04
Loss = 3.8310e-01, PNorm = 62.1783, GNorm = 1.5025, lr_0 = 1.9572e-04
Loss = 4.0694e-01, PNorm = 62.1808, GNorm = 1.2518, lr_0 = 1.9559e-04
Loss = 3.4837e-01, PNorm = 62.1803, GNorm = 1.7129, lr_0 = 1.9545e-04
Loss = 3.8292e-01, PNorm = 62.1830, GNorm = 1.7497, lr_0 = 1.9532e-04
Loss = 3.2797e-01, PNorm = 62.1864, GNorm = 1.2662, lr_0 = 1.9518e-04
Loss = 3.2693e-01, PNorm = 62.1871, GNorm = 1.5085, lr_0 = 1.9505e-04
Loss = 3.6192e-01, PNorm = 62.1871, GNorm = 1.7689, lr_0 = 1.9492e-04
Loss = 3.9784e-01, PNorm = 62.1871, GNorm = 1.2004, lr_0 = 1.9478e-04
Loss = 3.7109e-01, PNorm = 62.1869, GNorm = 1.6069, lr_0 = 1.9465e-04
Loss = 3.6384e-01, PNorm = 62.1890, GNorm = 1.4964, lr_0 = 1.9452e-04
Loss = 3.2788e-01, PNorm = 62.1908, GNorm = 1.2665, lr_0 = 1.9438e-04
Loss = 3.9740e-01, PNorm = 62.1915, GNorm = 1.4556, lr_0 = 1.9425e-04
Loss = 3.6934e-01, PNorm = 62.1927, GNorm = 1.7741, lr_0 = 1.9412e-04
Loss = 3.8143e-01, PNorm = 62.1930, GNorm = 1.9263, lr_0 = 1.9398e-04
Loss = 3.4654e-01, PNorm = 62.1938, GNorm = 1.7227, lr_0 = 1.9385e-04
Loss = 3.3005e-01, PNorm = 62.1981, GNorm = 1.3690, lr_0 = 1.9372e-04
Loss = 3.8954e-01, PNorm = 62.1994, GNorm = 1.0795, lr_0 = 1.9359e-04
Loss = 4.0266e-01, PNorm = 62.1996, GNorm = 1.4817, lr_0 = 1.9345e-04
Loss = 3.6363e-01, PNorm = 62.2028, GNorm = 1.1694, lr_0 = 1.9332e-04
Loss = 3.5374e-01, PNorm = 62.2019, GNorm = 1.5452, lr_0 = 1.9319e-04
Loss = 3.2606e-01, PNorm = 62.2068, GNorm = 1.2583, lr_0 = 1.9306e-04
Validation mae = 0.111655
Epoch 22
Loss = 3.4653e-01, PNorm = 62.2130, GNorm = 1.5022, lr_0 = 1.9292e-04
Loss = 3.7411e-01, PNorm = 62.2126, GNorm = 2.1767, lr_0 = 1.9279e-04
Loss = 3.5744e-01, PNorm = 62.2150, GNorm = 1.1578, lr_0 = 1.9266e-04
Loss = 4.0187e-01, PNorm = 62.2190, GNorm = 1.4260, lr_0 = 1.9253e-04
Loss = 3.8952e-01, PNorm = 62.2220, GNorm = 1.3437, lr_0 = 1.9240e-04
Loss = 3.2349e-01, PNorm = 62.2247, GNorm = 1.3369, lr_0 = 1.9226e-04
Loss = 3.9582e-01, PNorm = 62.2250, GNorm = 1.4548, lr_0 = 1.9213e-04
Loss = 3.2083e-01, PNorm = 62.2258, GNorm = 1.4709, lr_0 = 1.9200e-04
Loss = 3.9863e-01, PNorm = 62.2285, GNorm = 1.8641, lr_0 = 1.9187e-04
Loss = 3.6283e-01, PNorm = 62.2328, GNorm = 1.6600, lr_0 = 1.9174e-04
Loss = 3.6308e-01, PNorm = 62.2348, GNorm = 1.0212, lr_0 = 1.9161e-04
Loss = 3.7065e-01, PNorm = 62.2357, GNorm = 1.4030, lr_0 = 1.9148e-04
Loss = 4.0871e-01, PNorm = 62.2381, GNorm = 1.2501, lr_0 = 1.9134e-04
Loss = 3.2833e-01, PNorm = 62.2424, GNorm = 1.3655, lr_0 = 1.9121e-04
Loss = 3.4966e-01, PNorm = 62.2434, GNorm = 1.0666, lr_0 = 1.9108e-04
Loss = 3.6180e-01, PNorm = 62.2462, GNorm = 1.7359, lr_0 = 1.9095e-04
Loss = 3.4370e-01, PNorm = 62.2489, GNorm = 1.9122, lr_0 = 1.9082e-04
Loss = 3.5754e-01, PNorm = 62.2497, GNorm = 1.3344, lr_0 = 1.9069e-04
Loss = 3.6514e-01, PNorm = 62.2521, GNorm = 1.5577, lr_0 = 1.9056e-04
Loss = 3.5894e-01, PNorm = 62.2556, GNorm = 1.6151, lr_0 = 1.9043e-04
Loss = 3.5571e-01, PNorm = 62.2551, GNorm = 1.3037, lr_0 = 1.9030e-04
Loss = 3.6421e-01, PNorm = 62.2539, GNorm = 1.1942, lr_0 = 1.9017e-04
Loss = 3.5894e-01, PNorm = 62.2554, GNorm = 1.2200, lr_0 = 1.9004e-04
Loss = 3.2880e-01, PNorm = 62.2590, GNorm = 1.4053, lr_0 = 1.8991e-04
Loss = 3.3900e-01, PNorm = 62.2607, GNorm = 2.1889, lr_0 = 1.8978e-04
Loss = 3.6865e-01, PNorm = 62.2645, GNorm = 1.2445, lr_0 = 1.8965e-04
Loss = 3.5077e-01, PNorm = 62.2653, GNorm = 1.3974, lr_0 = 1.8952e-04
Loss = 4.1232e-01, PNorm = 62.2659, GNorm = 1.9247, lr_0 = 1.8939e-04
Loss = 3.3779e-01, PNorm = 62.2678, GNorm = 1.4088, lr_0 = 1.8926e-04
Loss = 3.5681e-01, PNorm = 62.2696, GNorm = 1.6138, lr_0 = 1.8913e-04
Loss = 3.0094e-01, PNorm = 62.2723, GNorm = 1.0753, lr_0 = 1.8900e-04
Loss = 3.5136e-01, PNorm = 62.2736, GNorm = 1.2478, lr_0 = 1.8887e-04
Loss = 3.4524e-01, PNorm = 62.2746, GNorm = 1.3173, lr_0 = 1.8874e-04
Loss = 3.7583e-01, PNorm = 62.2767, GNorm = 1.4207, lr_0 = 1.8861e-04
Loss = 4.1040e-01, PNorm = 62.2782, GNorm = 1.7965, lr_0 = 1.8848e-04
Loss = 3.2755e-01, PNorm = 62.2811, GNorm = 1.5043, lr_0 = 1.8835e-04
Loss = 3.4578e-01, PNorm = 62.2848, GNorm = 1.2665, lr_0 = 1.8822e-04
Loss = 3.9030e-01, PNorm = 62.2860, GNorm = 1.7062, lr_0 = 1.8809e-04
Loss = 3.8033e-01, PNorm = 62.2871, GNorm = 1.5868, lr_0 = 1.8797e-04
Loss = 3.4342e-01, PNorm = 62.2902, GNorm = 1.6937, lr_0 = 1.8784e-04
Loss = 3.3766e-01, PNorm = 62.2934, GNorm = 1.6679, lr_0 = 1.8771e-04
Loss = 3.3172e-01, PNorm = 62.2954, GNorm = 1.4499, lr_0 = 1.8758e-04
Loss = 3.6765e-01, PNorm = 62.2955, GNorm = 1.1672, lr_0 = 1.8745e-04
Loss = 3.2448e-01, PNorm = 62.2974, GNorm = 1.5614, lr_0 = 1.8732e-04
Loss = 3.3557e-01, PNorm = 62.2999, GNorm = 0.9773, lr_0 = 1.8719e-04
Loss = 3.6845e-01, PNorm = 62.3022, GNorm = 1.3001, lr_0 = 1.8707e-04
Loss = 4.1002e-01, PNorm = 62.3031, GNorm = 1.7265, lr_0 = 1.8694e-04
Loss = 3.6207e-01, PNorm = 62.3036, GNorm = 1.5379, lr_0 = 1.8681e-04
Loss = 3.5428e-01, PNorm = 62.3076, GNorm = 1.1742, lr_0 = 1.8668e-04
Loss = 3.1619e-01, PNorm = 62.3087, GNorm = 1.0837, lr_0 = 1.8655e-04
Loss = 3.1920e-01, PNorm = 62.3110, GNorm = 1.4784, lr_0 = 1.8643e-04
Loss = 3.6046e-01, PNorm = 62.3155, GNorm = 1.5931, lr_0 = 1.8630e-04
Loss = 3.2287e-01, PNorm = 62.3175, GNorm = 1.7456, lr_0 = 1.8617e-04
Loss = 3.2539e-01, PNorm = 62.3187, GNorm = 1.8154, lr_0 = 1.8604e-04
Loss = 3.2891e-01, PNorm = 62.3201, GNorm = 1.8582, lr_0 = 1.8592e-04
Loss = 3.5499e-01, PNorm = 62.3192, GNorm = 1.1439, lr_0 = 1.8579e-04
Loss = 3.9303e-01, PNorm = 62.3191, GNorm = 2.2868, lr_0 = 1.8566e-04
Loss = 3.3870e-01, PNorm = 62.3213, GNorm = 1.5336, lr_0 = 1.8553e-04
Loss = 3.7173e-01, PNorm = 62.3213, GNorm = 1.2512, lr_0 = 1.8541e-04
Loss = 3.2160e-01, PNorm = 62.3237, GNorm = 0.8416, lr_0 = 1.8528e-04
Loss = 3.1320e-01, PNorm = 62.3277, GNorm = 2.0535, lr_0 = 1.8515e-04
Loss = 3.4764e-01, PNorm = 62.3296, GNorm = 1.7117, lr_0 = 1.8503e-04
Loss = 3.5488e-01, PNorm = 62.3339, GNorm = 1.5436, lr_0 = 1.8490e-04
Loss = 3.5276e-01, PNorm = 62.3351, GNorm = 1.7153, lr_0 = 1.8477e-04
Loss = 3.6067e-01, PNorm = 62.3364, GNorm = 1.5869, lr_0 = 1.8465e-04
Loss = 3.3373e-01, PNorm = 62.3398, GNorm = 1.6141, lr_0 = 1.8452e-04
Loss = 3.4087e-01, PNorm = 62.3370, GNorm = 1.0959, lr_0 = 1.8439e-04
Loss = 3.9042e-01, PNorm = 62.3393, GNorm = 1.0837, lr_0 = 1.8427e-04
Loss = 3.3589e-01, PNorm = 62.3426, GNorm = 1.5894, lr_0 = 1.8414e-04
Loss = 3.4581e-01, PNorm = 62.3428, GNorm = 1.2728, lr_0 = 1.8401e-04
Loss = 3.3746e-01, PNorm = 62.3433, GNorm = 1.6865, lr_0 = 1.8389e-04
Loss = 3.3899e-01, PNorm = 62.3457, GNorm = 1.0753, lr_0 = 1.8376e-04
Loss = 3.4830e-01, PNorm = 62.3470, GNorm = 1.1398, lr_0 = 1.8364e-04
Loss = 3.8113e-01, PNorm = 62.3471, GNorm = 1.6951, lr_0 = 1.8351e-04
Loss = 3.4567e-01, PNorm = 62.3502, GNorm = 1.1387, lr_0 = 1.8338e-04
Loss = 3.9610e-01, PNorm = 62.3530, GNorm = 1.1398, lr_0 = 1.8326e-04
Loss = 4.0931e-01, PNorm = 62.3523, GNorm = 1.1465, lr_0 = 1.8313e-04
Loss = 3.5091e-01, PNorm = 62.3530, GNorm = 1.6783, lr_0 = 1.8301e-04
Loss = 3.3126e-01, PNorm = 62.3573, GNorm = 1.3219, lr_0 = 1.8288e-04
Loss = 3.9250e-01, PNorm = 62.3612, GNorm = 1.8130, lr_0 = 1.8276e-04
Loss = 3.4286e-01, PNorm = 62.3636, GNorm = 1.8604, lr_0 = 1.8263e-04
Loss = 3.3812e-01, PNorm = 62.3676, GNorm = 1.7975, lr_0 = 1.8251e-04
Loss = 3.4431e-01, PNorm = 62.3684, GNorm = 1.3937, lr_0 = 1.8238e-04
Loss = 3.5543e-01, PNorm = 62.3680, GNorm = 2.0326, lr_0 = 1.8226e-04
Loss = 3.3407e-01, PNorm = 62.3703, GNorm = 1.5865, lr_0 = 1.8213e-04
Loss = 3.6065e-01, PNorm = 62.3729, GNorm = 2.0482, lr_0 = 1.8201e-04
Loss = 3.4993e-01, PNorm = 62.3730, GNorm = 1.1300, lr_0 = 1.8188e-04
Loss = 3.5777e-01, PNorm = 62.3752, GNorm = 1.2972, lr_0 = 1.8176e-04
Loss = 3.6398e-01, PNorm = 62.3787, GNorm = 1.4436, lr_0 = 1.8163e-04
Loss = 3.5430e-01, PNorm = 62.3805, GNorm = 1.6149, lr_0 = 1.8151e-04
Loss = 3.9394e-01, PNorm = 62.3812, GNorm = 1.1611, lr_0 = 1.8138e-04
Loss = 3.6170e-01, PNorm = 62.3835, GNorm = 1.3704, lr_0 = 1.8126e-04
Loss = 3.5118e-01, PNorm = 62.3854, GNorm = 1.3609, lr_0 = 1.8114e-04
Loss = 3.9079e-01, PNorm = 62.3878, GNorm = 1.3559, lr_0 = 1.8101e-04
Loss = 3.6603e-01, PNorm = 62.3910, GNorm = 1.3858, lr_0 = 1.8089e-04
Loss = 3.6396e-01, PNorm = 62.3940, GNorm = 1.0458, lr_0 = 1.8076e-04
Loss = 3.6946e-01, PNorm = 62.3957, GNorm = 1.2612, lr_0 = 1.8064e-04
Loss = 3.4269e-01, PNorm = 62.4001, GNorm = 1.6361, lr_0 = 1.8052e-04
Loss = 3.8704e-01, PNorm = 62.4027, GNorm = 1.1805, lr_0 = 1.8039e-04
Loss = 3.4720e-01, PNorm = 62.4064, GNorm = 1.3414, lr_0 = 1.8027e-04
Loss = 3.8208e-01, PNorm = 62.4088, GNorm = 2.0574, lr_0 = 1.8015e-04
Loss = 3.2223e-01, PNorm = 62.4086, GNorm = 1.2894, lr_0 = 1.8002e-04
Loss = 3.4550e-01, PNorm = 62.4094, GNorm = 1.2805, lr_0 = 1.7990e-04
Loss = 3.0368e-01, PNorm = 62.4135, GNorm = 2.6557, lr_0 = 1.7978e-04
Loss = 4.1833e-01, PNorm = 62.4145, GNorm = 1.4134, lr_0 = 1.7965e-04
Loss = 4.0139e-01, PNorm = 62.4155, GNorm = 1.4159, lr_0 = 1.7953e-04
Loss = 3.6829e-01, PNorm = 62.4156, GNorm = 1.5185, lr_0 = 1.7941e-04
Loss = 3.5218e-01, PNorm = 62.4160, GNorm = 1.2065, lr_0 = 1.7928e-04
Loss = 3.5546e-01, PNorm = 62.4193, GNorm = 1.1299, lr_0 = 1.7916e-04
Loss = 3.5257e-01, PNorm = 62.4216, GNorm = 1.2256, lr_0 = 1.7904e-04
Loss = 3.7410e-01, PNorm = 62.4256, GNorm = 1.4709, lr_0 = 1.7892e-04
Loss = 3.7006e-01, PNorm = 62.4282, GNorm = 1.2534, lr_0 = 1.7879e-04
Loss = 3.5392e-01, PNorm = 62.4299, GNorm = 1.7078, lr_0 = 1.7867e-04
Loss = 3.5507e-01, PNorm = 62.4320, GNorm = 1.7111, lr_0 = 1.7855e-04
Loss = 3.5502e-01, PNorm = 62.4333, GNorm = 1.2770, lr_0 = 1.7843e-04
Loss = 3.6194e-01, PNorm = 62.4360, GNorm = 1.9972, lr_0 = 1.7830e-04
Loss = 3.0943e-01, PNorm = 62.4382, GNorm = 1.5474, lr_0 = 1.7818e-04
Loss = 4.0966e-01, PNorm = 62.4360, GNorm = 1.2920, lr_0 = 1.7806e-04
Loss = 4.3247e-01, PNorm = 62.4372, GNorm = 1.4281, lr_0 = 1.7794e-04
Loss = 3.2505e-01, PNorm = 62.4383, GNorm = 1.6284, lr_0 = 1.7782e-04
Validation mae = 0.111135
Epoch 23
Loss = 2.8991e-01, PNorm = 62.4407, GNorm = 1.1287, lr_0 = 1.7769e-04
Loss = 3.3654e-01, PNorm = 62.4433, GNorm = 1.1992, lr_0 = 1.7757e-04
Loss = 3.7272e-01, PNorm = 62.4452, GNorm = 1.3037, lr_0 = 1.7745e-04
Loss = 3.6373e-01, PNorm = 62.4457, GNorm = 1.4289, lr_0 = 1.7733e-04
Loss = 3.2733e-01, PNorm = 62.4438, GNorm = 1.2243, lr_0 = 1.7721e-04
Loss = 3.4754e-01, PNorm = 62.4465, GNorm = 1.1217, lr_0 = 1.7709e-04
Loss = 3.3801e-01, PNorm = 62.4484, GNorm = 1.3182, lr_0 = 1.7696e-04
Loss = 3.9225e-01, PNorm = 62.4507, GNorm = 1.7164, lr_0 = 1.7684e-04
Loss = 3.4914e-01, PNorm = 62.4527, GNorm = 1.0780, lr_0 = 1.7672e-04
Loss = 3.8518e-01, PNorm = 62.4529, GNorm = 1.4778, lr_0 = 1.7660e-04
Loss = 3.3539e-01, PNorm = 62.4537, GNorm = 1.2970, lr_0 = 1.7648e-04
Loss = 4.5622e-01, PNorm = 62.4541, GNorm = 1.6126, lr_0 = 1.7636e-04
Loss = 4.0367e-01, PNorm = 62.4543, GNorm = 2.1320, lr_0 = 1.7624e-04
Loss = 3.0520e-01, PNorm = 62.4584, GNorm = 1.5859, lr_0 = 1.7612e-04
Loss = 3.6435e-01, PNorm = 62.4617, GNorm = 1.5301, lr_0 = 1.7600e-04
Loss = 3.6414e-01, PNorm = 62.4635, GNorm = 1.3367, lr_0 = 1.7588e-04
Loss = 3.4933e-01, PNorm = 62.4677, GNorm = 1.3477, lr_0 = 1.7576e-04
Loss = 3.8501e-01, PNorm = 62.4687, GNorm = 1.6121, lr_0 = 1.7564e-04
Loss = 3.9924e-01, PNorm = 62.4712, GNorm = 1.4655, lr_0 = 1.7552e-04
Loss = 3.1333e-01, PNorm = 62.4726, GNorm = 1.4899, lr_0 = 1.7540e-04
Loss = 3.5204e-01, PNorm = 62.4737, GNorm = 1.2822, lr_0 = 1.7528e-04
Loss = 3.6134e-01, PNorm = 62.4749, GNorm = 1.3378, lr_0 = 1.7516e-04
Loss = 3.6309e-01, PNorm = 62.4780, GNorm = 1.3976, lr_0 = 1.7504e-04
Loss = 3.3274e-01, PNorm = 62.4810, GNorm = 1.5584, lr_0 = 1.7492e-04
Loss = 3.4416e-01, PNorm = 62.4817, GNorm = 1.4355, lr_0 = 1.7480e-04
Loss = 2.9483e-01, PNorm = 62.4846, GNorm = 1.5005, lr_0 = 1.7468e-04
Loss = 3.4465e-01, PNorm = 62.4859, GNorm = 1.7573, lr_0 = 1.7456e-04
Loss = 3.7391e-01, PNorm = 62.4863, GNorm = 1.3195, lr_0 = 1.7444e-04
Loss = 3.5669e-01, PNorm = 62.4881, GNorm = 1.4253, lr_0 = 1.7432e-04
Loss = 3.3318e-01, PNorm = 62.4895, GNorm = 1.9756, lr_0 = 1.7420e-04
Loss = 3.6608e-01, PNorm = 62.4899, GNorm = 1.5744, lr_0 = 1.7408e-04
Loss = 4.0948e-01, PNorm = 62.4913, GNorm = 1.6541, lr_0 = 1.7396e-04
Loss = 3.7554e-01, PNorm = 62.4937, GNorm = 1.8430, lr_0 = 1.7384e-04
Loss = 3.1964e-01, PNorm = 62.4949, GNorm = 1.3432, lr_0 = 1.7372e-04
Loss = 3.1859e-01, PNorm = 62.4957, GNorm = 1.3217, lr_0 = 1.7360e-04
Loss = 2.9289e-01, PNorm = 62.4966, GNorm = 1.2263, lr_0 = 1.7348e-04
Loss = 3.5330e-01, PNorm = 62.4978, GNorm = 1.6716, lr_0 = 1.7336e-04
Loss = 3.3234e-01, PNorm = 62.5004, GNorm = 2.2050, lr_0 = 1.7325e-04
Loss = 3.3384e-01, PNorm = 62.5006, GNorm = 1.3365, lr_0 = 1.7313e-04
Loss = 3.4539e-01, PNorm = 62.5031, GNorm = 1.2616, lr_0 = 1.7301e-04
Loss = 3.6785e-01, PNorm = 62.5049, GNorm = 2.5326, lr_0 = 1.7289e-04
Loss = 3.5921e-01, PNorm = 62.5040, GNorm = 1.3781, lr_0 = 1.7277e-04
Loss = 3.4522e-01, PNorm = 62.5042, GNorm = 1.0552, lr_0 = 1.7265e-04
Loss = 3.0299e-01, PNorm = 62.5063, GNorm = 1.3651, lr_0 = 1.7253e-04
Loss = 3.4455e-01, PNorm = 62.5073, GNorm = 1.4732, lr_0 = 1.7242e-04
Loss = 3.3917e-01, PNorm = 62.5077, GNorm = 1.1073, lr_0 = 1.7230e-04
Loss = 3.4971e-01, PNorm = 62.5092, GNorm = 1.4551, lr_0 = 1.7218e-04
Loss = 3.6529e-01, PNorm = 62.5119, GNorm = 1.3740, lr_0 = 1.7206e-04
Loss = 3.3496e-01, PNorm = 62.5122, GNorm = 1.2394, lr_0 = 1.7194e-04
Loss = 3.2801e-01, PNorm = 62.5123, GNorm = 1.5786, lr_0 = 1.7183e-04
Loss = 4.2277e-01, PNorm = 62.5171, GNorm = 2.0497, lr_0 = 1.7171e-04
Loss = 3.9272e-01, PNorm = 62.5194, GNorm = 1.3496, lr_0 = 1.7159e-04
Loss = 3.7762e-01, PNorm = 62.5197, GNorm = 1.4248, lr_0 = 1.7147e-04
Loss = 3.2521e-01, PNorm = 62.5207, GNorm = 1.2102, lr_0 = 1.7136e-04
Loss = 4.2073e-01, PNorm = 62.5226, GNorm = 1.7209, lr_0 = 1.7124e-04
Loss = 3.6413e-01, PNorm = 62.5258, GNorm = 1.2675, lr_0 = 1.7112e-04
Loss = 3.1452e-01, PNorm = 62.5279, GNorm = 1.2539, lr_0 = 1.7100e-04
Loss = 3.4528e-01, PNorm = 62.5286, GNorm = 1.2088, lr_0 = 1.7089e-04
Loss = 3.2946e-01, PNorm = 62.5310, GNorm = 1.3078, lr_0 = 1.7077e-04
Loss = 3.2072e-01, PNorm = 62.5316, GNorm = 1.1112, lr_0 = 1.7065e-04
Loss = 3.1426e-01, PNorm = 62.5315, GNorm = 1.4936, lr_0 = 1.7054e-04
Loss = 3.3620e-01, PNorm = 62.5314, GNorm = 1.6459, lr_0 = 1.7042e-04
Loss = 3.8185e-01, PNorm = 62.5314, GNorm = 1.7334, lr_0 = 1.7030e-04
Loss = 3.4586e-01, PNorm = 62.5339, GNorm = 1.2736, lr_0 = 1.7019e-04
Loss = 3.6496e-01, PNorm = 62.5367, GNorm = 1.7208, lr_0 = 1.7007e-04
Loss = 3.0781e-01, PNorm = 62.5363, GNorm = 1.8455, lr_0 = 1.6995e-04
Loss = 3.5176e-01, PNorm = 62.5382, GNorm = 1.4861, lr_0 = 1.6984e-04
Loss = 3.2148e-01, PNorm = 62.5388, GNorm = 1.4884, lr_0 = 1.6972e-04
Loss = 3.5564e-01, PNorm = 62.5373, GNorm = 1.4214, lr_0 = 1.6960e-04
Loss = 3.6965e-01, PNorm = 62.5381, GNorm = 1.4770, lr_0 = 1.6949e-04
Loss = 3.0341e-01, PNorm = 62.5419, GNorm = 1.7437, lr_0 = 1.6937e-04
Loss = 3.7045e-01, PNorm = 62.5427, GNorm = 1.6338, lr_0 = 1.6926e-04
Loss = 4.0284e-01, PNorm = 62.5449, GNorm = 2.3952, lr_0 = 1.6914e-04
Loss = 3.3322e-01, PNorm = 62.5486, GNorm = 1.3751, lr_0 = 1.6902e-04
Loss = 3.8969e-01, PNorm = 62.5511, GNorm = 1.1133, lr_0 = 1.6891e-04
Loss = 4.2955e-01, PNorm = 62.5546, GNorm = 1.5916, lr_0 = 1.6879e-04
Loss = 3.8902e-01, PNorm = 62.5570, GNorm = 1.6553, lr_0 = 1.6868e-04
Loss = 3.1015e-01, PNorm = 62.5597, GNorm = 1.3695, lr_0 = 1.6856e-04
Loss = 3.6580e-01, PNorm = 62.5602, GNorm = 1.4243, lr_0 = 1.6845e-04
Loss = 3.5454e-01, PNorm = 62.5609, GNorm = 1.3772, lr_0 = 1.6833e-04
Loss = 4.4546e-01, PNorm = 62.5609, GNorm = 1.8507, lr_0 = 1.6821e-04
Loss = 3.8549e-01, PNorm = 62.5645, GNorm = 1.2871, lr_0 = 1.6810e-04
Loss = 3.7577e-01, PNorm = 62.5654, GNorm = 1.9189, lr_0 = 1.6798e-04
Loss = 3.3884e-01, PNorm = 62.5652, GNorm = 1.5238, lr_0 = 1.6787e-04
Loss = 3.7272e-01, PNorm = 62.5685, GNorm = 1.3311, lr_0 = 1.6775e-04
Loss = 3.1686e-01, PNorm = 62.5690, GNorm = 1.1823, lr_0 = 1.6764e-04
Loss = 3.7465e-01, PNorm = 62.5696, GNorm = 2.2763, lr_0 = 1.6752e-04
Loss = 3.8463e-01, PNorm = 62.5710, GNorm = 1.8482, lr_0 = 1.6741e-04
Loss = 3.2175e-01, PNorm = 62.5723, GNorm = 1.5172, lr_0 = 1.6729e-04
Loss = 3.3866e-01, PNorm = 62.5739, GNorm = 1.2744, lr_0 = 1.6718e-04
Loss = 4.2290e-01, PNorm = 62.5750, GNorm = 1.5735, lr_0 = 1.6707e-04
Loss = 3.6304e-01, PNorm = 62.5773, GNorm = 1.1609, lr_0 = 1.6695e-04
Loss = 3.5422e-01, PNorm = 62.5797, GNorm = 1.7944, lr_0 = 1.6684e-04
Loss = 3.6990e-01, PNorm = 62.5795, GNorm = 1.5228, lr_0 = 1.6672e-04
Loss = 3.8546e-01, PNorm = 62.5790, GNorm = 1.1199, lr_0 = 1.6661e-04
Loss = 3.1608e-01, PNorm = 62.5796, GNorm = 1.3193, lr_0 = 1.6649e-04
Loss = 3.4173e-01, PNorm = 62.5815, GNorm = 1.5270, lr_0 = 1.6638e-04
Loss = 3.6655e-01, PNorm = 62.5828, GNorm = 1.7265, lr_0 = 1.6627e-04
Loss = 3.4634e-01, PNorm = 62.5834, GNorm = 2.0971, lr_0 = 1.6615e-04
Loss = 3.3220e-01, PNorm = 62.5874, GNorm = 1.0768, lr_0 = 1.6604e-04
Loss = 3.6184e-01, PNorm = 62.5897, GNorm = 1.1237, lr_0 = 1.6592e-04
Loss = 4.0507e-01, PNorm = 62.5927, GNorm = 1.7578, lr_0 = 1.6581e-04
Loss = 3.9482e-01, PNorm = 62.5947, GNorm = 1.7941, lr_0 = 1.6570e-04
Loss = 3.9404e-01, PNorm = 62.5950, GNorm = 1.5106, lr_0 = 1.6558e-04
Loss = 3.2998e-01, PNorm = 62.5967, GNorm = 1.3924, lr_0 = 1.6547e-04
Loss = 3.7648e-01, PNorm = 62.6006, GNorm = 1.9793, lr_0 = 1.6536e-04
Loss = 3.6778e-01, PNorm = 62.6029, GNorm = 1.2667, lr_0 = 1.6524e-04
Loss = 3.5076e-01, PNorm = 62.6052, GNorm = 1.5848, lr_0 = 1.6513e-04
Loss = 3.6906e-01, PNorm = 62.6071, GNorm = 1.6175, lr_0 = 1.6502e-04
Loss = 3.7755e-01, PNorm = 62.6102, GNorm = 1.6100, lr_0 = 1.6490e-04
Loss = 3.4838e-01, PNorm = 62.6135, GNorm = 1.3851, lr_0 = 1.6479e-04
Loss = 3.5161e-01, PNorm = 62.6152, GNorm = 1.7188, lr_0 = 1.6468e-04
Loss = 3.6496e-01, PNorm = 62.6156, GNorm = 1.4484, lr_0 = 1.6457e-04
Loss = 3.5787e-01, PNorm = 62.6169, GNorm = 1.5974, lr_0 = 1.6445e-04
Loss = 3.5469e-01, PNorm = 62.6214, GNorm = 1.4566, lr_0 = 1.6434e-04
Loss = 3.7417e-01, PNorm = 62.6227, GNorm = 1.8559, lr_0 = 1.6423e-04
Loss = 3.4629e-01, PNorm = 62.6241, GNorm = 1.6064, lr_0 = 1.6412e-04
Loss = 3.3061e-01, PNorm = 62.6246, GNorm = 1.9324, lr_0 = 1.6400e-04
Loss = 3.2973e-01, PNorm = 62.6244, GNorm = 1.4916, lr_0 = 1.6389e-04
Loss = 3.4264e-01, PNorm = 62.6267, GNorm = 1.9032, lr_0 = 1.6378e-04
Validation mae = 0.111134
Epoch 24
Loss = 3.3884e-01, PNorm = 62.6285, GNorm = 1.4276, lr_0 = 1.6367e-04
Loss = 3.4232e-01, PNorm = 62.6298, GNorm = 1.1511, lr_0 = 1.6355e-04
Loss = 3.3128e-01, PNorm = 62.6332, GNorm = 1.9863, lr_0 = 1.6344e-04
Loss = 3.5311e-01, PNorm = 62.6338, GNorm = 1.4056, lr_0 = 1.6333e-04
Loss = 3.5366e-01, PNorm = 62.6352, GNorm = 1.1124, lr_0 = 1.6322e-04
Loss = 3.8733e-01, PNorm = 62.6364, GNorm = 2.1692, lr_0 = 1.6311e-04
Loss = 3.6865e-01, PNorm = 62.6368, GNorm = 1.9067, lr_0 = 1.6299e-04
Loss = 3.4349e-01, PNorm = 62.6395, GNorm = 1.6101, lr_0 = 1.6288e-04
Loss = 3.4253e-01, PNorm = 62.6390, GNorm = 1.9605, lr_0 = 1.6277e-04
Loss = 3.3911e-01, PNorm = 62.6382, GNorm = 1.3137, lr_0 = 1.6266e-04
Loss = 3.8453e-01, PNorm = 62.6415, GNorm = 1.5104, lr_0 = 1.6255e-04
Loss = 2.8620e-01, PNorm = 62.6434, GNorm = 1.4064, lr_0 = 1.6244e-04
Loss = 3.2615e-01, PNorm = 62.6436, GNorm = 1.6571, lr_0 = 1.6233e-04
Loss = 3.1014e-01, PNorm = 62.6463, GNorm = 1.3278, lr_0 = 1.6221e-04
Loss = 3.4945e-01, PNorm = 62.6473, GNorm = 1.2630, lr_0 = 1.6210e-04
Loss = 3.9130e-01, PNorm = 62.6488, GNorm = 1.5853, lr_0 = 1.6199e-04
Loss = 3.4160e-01, PNorm = 62.6495, GNorm = 1.3868, lr_0 = 1.6188e-04
Loss = 3.6206e-01, PNorm = 62.6507, GNorm = 1.3100, lr_0 = 1.6177e-04
Loss = 3.4118e-01, PNorm = 62.6522, GNorm = 1.7240, lr_0 = 1.6166e-04
Loss = 3.7428e-01, PNorm = 62.6499, GNorm = 1.4386, lr_0 = 1.6155e-04
Loss = 3.3290e-01, PNorm = 62.6527, GNorm = 1.3900, lr_0 = 1.6144e-04
Loss = 2.9771e-01, PNorm = 62.6549, GNorm = 1.3149, lr_0 = 1.6133e-04
Loss = 3.3291e-01, PNorm = 62.6568, GNorm = 1.3078, lr_0 = 1.6122e-04
Loss = 3.6252e-01, PNorm = 62.6583, GNorm = 1.2610, lr_0 = 1.6111e-04
Loss = 3.2462e-01, PNorm = 62.6588, GNorm = 1.3103, lr_0 = 1.6100e-04
Loss = 3.2547e-01, PNorm = 62.6600, GNorm = 1.3571, lr_0 = 1.6089e-04
Loss = 3.8631e-01, PNorm = 62.6624, GNorm = 1.9409, lr_0 = 1.6078e-04
Loss = 3.3796e-01, PNorm = 62.6658, GNorm = 1.4041, lr_0 = 1.6067e-04
Loss = 3.2575e-01, PNorm = 62.6674, GNorm = 1.1773, lr_0 = 1.6056e-04
Loss = 3.5195e-01, PNorm = 62.6680, GNorm = 2.4185, lr_0 = 1.6045e-04
Loss = 3.7338e-01, PNorm = 62.6702, GNorm = 1.3248, lr_0 = 1.6034e-04
Loss = 3.7338e-01, PNorm = 62.6738, GNorm = 1.8149, lr_0 = 1.6023e-04
Loss = 3.4912e-01, PNorm = 62.6756, GNorm = 1.1932, lr_0 = 1.6012e-04
Loss = 3.0838e-01, PNorm = 62.6763, GNorm = 1.2428, lr_0 = 1.6001e-04
Loss = 3.9478e-01, PNorm = 62.6780, GNorm = 1.5685, lr_0 = 1.5990e-04
Loss = 3.4147e-01, PNorm = 62.6789, GNorm = 2.1040, lr_0 = 1.5979e-04
Loss = 3.5768e-01, PNorm = 62.6809, GNorm = 1.7787, lr_0 = 1.5968e-04
Loss = 3.1918e-01, PNorm = 62.6842, GNorm = 1.2072, lr_0 = 1.5957e-04
Loss = 3.3084e-01, PNorm = 62.6842, GNorm = 2.3450, lr_0 = 1.5946e-04
Loss = 3.2524e-01, PNorm = 62.6853, GNorm = 1.6208, lr_0 = 1.5935e-04
Loss = 4.1206e-01, PNorm = 62.6874, GNorm = 1.7504, lr_0 = 1.5924e-04
Loss = 3.5851e-01, PNorm = 62.6893, GNorm = 1.9929, lr_0 = 1.5913e-04
Loss = 3.9025e-01, PNorm = 62.6897, GNorm = 1.9387, lr_0 = 1.5902e-04
Loss = 3.7113e-01, PNorm = 62.6913, GNorm = 1.5942, lr_0 = 1.5891e-04
Loss = 3.7821e-01, PNorm = 62.6921, GNorm = 1.6792, lr_0 = 1.5880e-04
Loss = 3.6412e-01, PNorm = 62.6937, GNorm = 1.1512, lr_0 = 1.5870e-04
Loss = 3.3993e-01, PNorm = 62.6948, GNorm = 1.5373, lr_0 = 1.5859e-04
Loss = 3.2510e-01, PNorm = 62.6947, GNorm = 1.6052, lr_0 = 1.5848e-04
Loss = 3.6281e-01, PNorm = 62.6955, GNorm = 1.2653, lr_0 = 1.5837e-04
Loss = 3.3761e-01, PNorm = 62.6965, GNorm = 2.0133, lr_0 = 1.5826e-04
Loss = 3.6619e-01, PNorm = 62.6948, GNorm = 1.1708, lr_0 = 1.5815e-04
Loss = 3.2208e-01, PNorm = 62.6955, GNorm = 1.9926, lr_0 = 1.5804e-04
Loss = 3.0659e-01, PNorm = 62.6994, GNorm = 1.0019, lr_0 = 1.5794e-04
Loss = 3.9601e-01, PNorm = 62.7025, GNorm = 1.3800, lr_0 = 1.5783e-04
Loss = 3.7602e-01, PNorm = 62.7037, GNorm = 1.9582, lr_0 = 1.5772e-04
Loss = 3.4976e-01, PNorm = 62.7058, GNorm = 1.2966, lr_0 = 1.5761e-04
Loss = 4.1906e-01, PNorm = 62.7085, GNorm = 2.4721, lr_0 = 1.5750e-04
Loss = 3.4559e-01, PNorm = 62.7084, GNorm = 1.5156, lr_0 = 1.5740e-04
Loss = 3.6296e-01, PNorm = 62.7088, GNorm = 1.6980, lr_0 = 1.5729e-04
Loss = 3.5564e-01, PNorm = 62.7109, GNorm = 1.5355, lr_0 = 1.5718e-04
Loss = 3.6840e-01, PNorm = 62.7125, GNorm = 2.0845, lr_0 = 1.5707e-04
Loss = 3.5576e-01, PNorm = 62.7150, GNorm = 1.2927, lr_0 = 1.5697e-04
Loss = 3.2958e-01, PNorm = 62.7166, GNorm = 1.6233, lr_0 = 1.5686e-04
Loss = 3.0833e-01, PNorm = 62.7178, GNorm = 1.3389, lr_0 = 1.5675e-04
Loss = 3.3482e-01, PNorm = 62.7187, GNorm = 1.9519, lr_0 = 1.5664e-04
Loss = 3.2394e-01, PNorm = 62.7212, GNorm = 1.4376, lr_0 = 1.5654e-04
Loss = 3.2445e-01, PNorm = 62.7244, GNorm = 1.4300, lr_0 = 1.5643e-04
Loss = 3.2328e-01, PNorm = 62.7262, GNorm = 1.2858, lr_0 = 1.5632e-04
Loss = 2.9319e-01, PNorm = 62.7285, GNorm = 1.5812, lr_0 = 1.5621e-04
Loss = 3.1333e-01, PNorm = 62.7329, GNorm = 1.3070, lr_0 = 1.5611e-04
Loss = 3.4591e-01, PNorm = 62.7347, GNorm = 1.5038, lr_0 = 1.5600e-04
Loss = 3.8539e-01, PNorm = 62.7347, GNorm = 1.7752, lr_0 = 1.5589e-04
Loss = 3.4691e-01, PNorm = 62.7384, GNorm = 1.2524, lr_0 = 1.5579e-04
Loss = 3.7219e-01, PNorm = 62.7390, GNorm = 1.4363, lr_0 = 1.5568e-04
Loss = 3.1756e-01, PNorm = 62.7401, GNorm = 1.6854, lr_0 = 1.5557e-04
Loss = 3.6019e-01, PNorm = 62.7411, GNorm = 1.3387, lr_0 = 1.5547e-04
Loss = 3.9321e-01, PNorm = 62.7432, GNorm = 2.1150, lr_0 = 1.5536e-04
Loss = 3.7163e-01, PNorm = 62.7473, GNorm = 1.4956, lr_0 = 1.5525e-04
Loss = 4.2914e-01, PNorm = 62.7489, GNorm = 1.4802, lr_0 = 1.5515e-04
Loss = 3.4913e-01, PNorm = 62.7491, GNorm = 1.5197, lr_0 = 1.5504e-04
Loss = 3.6931e-01, PNorm = 62.7518, GNorm = 1.4637, lr_0 = 1.5493e-04
Loss = 3.9942e-01, PNorm = 62.7523, GNorm = 1.4690, lr_0 = 1.5483e-04
Loss = 3.3833e-01, PNorm = 62.7539, GNorm = 1.6803, lr_0 = 1.5472e-04
Loss = 3.3244e-01, PNorm = 62.7548, GNorm = 1.3221, lr_0 = 1.5462e-04
Loss = 3.3746e-01, PNorm = 62.7556, GNorm = 1.4346, lr_0 = 1.5451e-04
Loss = 3.5489e-01, PNorm = 62.7563, GNorm = 1.4114, lr_0 = 1.5440e-04
Loss = 3.1390e-01, PNorm = 62.7590, GNorm = 1.4316, lr_0 = 1.5430e-04
Loss = 3.2391e-01, PNorm = 62.7603, GNorm = 0.9582, lr_0 = 1.5419e-04
Loss = 3.2236e-01, PNorm = 62.7623, GNorm = 1.3322, lr_0 = 1.5409e-04
Loss = 3.7190e-01, PNorm = 62.7640, GNorm = 1.4923, lr_0 = 1.5398e-04
Loss = 3.9274e-01, PNorm = 62.7655, GNorm = 1.6500, lr_0 = 1.5388e-04
Loss = 3.5470e-01, PNorm = 62.7683, GNorm = 1.8269, lr_0 = 1.5377e-04
Loss = 3.5593e-01, PNorm = 62.7704, GNorm = 1.5199, lr_0 = 1.5367e-04
Loss = 3.5037e-01, PNorm = 62.7709, GNorm = 1.8404, lr_0 = 1.5356e-04
Loss = 3.6604e-01, PNorm = 62.7709, GNorm = 1.5318, lr_0 = 1.5346e-04
Loss = 3.4504e-01, PNorm = 62.7727, GNorm = 1.7817, lr_0 = 1.5335e-04
Loss = 3.4663e-01, PNorm = 62.7731, GNorm = 1.1407, lr_0 = 1.5325e-04
Loss = 3.5984e-01, PNorm = 62.7749, GNorm = 1.5195, lr_0 = 1.5314e-04
Loss = 3.4973e-01, PNorm = 62.7772, GNorm = 1.4414, lr_0 = 1.5304e-04
Loss = 3.5639e-01, PNorm = 62.7771, GNorm = 1.4026, lr_0 = 1.5293e-04
Loss = 3.4464e-01, PNorm = 62.7782, GNorm = 2.5955, lr_0 = 1.5283e-04
Loss = 3.3405e-01, PNorm = 62.7808, GNorm = 1.7552, lr_0 = 1.5272e-04
Loss = 3.3657e-01, PNorm = 62.7821, GNorm = 2.1084, lr_0 = 1.5262e-04
Loss = 3.8318e-01, PNorm = 62.7853, GNorm = 1.4991, lr_0 = 1.5251e-04
Loss = 4.4885e-01, PNorm = 62.7847, GNorm = 1.8477, lr_0 = 1.5241e-04
Loss = 3.6901e-01, PNorm = 62.7851, GNorm = 1.9120, lr_0 = 1.5230e-04
Loss = 3.7894e-01, PNorm = 62.7856, GNorm = 1.0939, lr_0 = 1.5220e-04
Loss = 3.3314e-01, PNorm = 62.7859, GNorm = 1.7660, lr_0 = 1.5209e-04
Loss = 3.4044e-01, PNorm = 62.7871, GNorm = 2.0996, lr_0 = 1.5199e-04
Loss = 3.6399e-01, PNorm = 62.7891, GNorm = 1.6015, lr_0 = 1.5189e-04
Loss = 3.2687e-01, PNorm = 62.7887, GNorm = 2.0087, lr_0 = 1.5178e-04
Loss = 3.4058e-01, PNorm = 62.7893, GNorm = 1.3320, lr_0 = 1.5168e-04
Loss = 3.6021e-01, PNorm = 62.7912, GNorm = 1.5072, lr_0 = 1.5157e-04
Loss = 3.5230e-01, PNorm = 62.7918, GNorm = 1.2276, lr_0 = 1.5147e-04
Loss = 3.4163e-01, PNorm = 62.7918, GNorm = 1.7912, lr_0 = 1.5137e-04
Loss = 3.9495e-01, PNorm = 62.7952, GNorm = 1.4436, lr_0 = 1.5126e-04
Loss = 3.5405e-01, PNorm = 62.8010, GNorm = 1.5477, lr_0 = 1.5116e-04
Loss = 4.3451e-01, PNorm = 62.8045, GNorm = 1.8938, lr_0 = 1.5106e-04
Loss = 3.3525e-01, PNorm = 62.8049, GNorm = 1.9700, lr_0 = 1.5095e-04
Loss = 3.4856e-01, PNorm = 62.8060, GNorm = 1.3163, lr_0 = 1.5085e-04
Validation mae = 0.111361
Epoch 25
Loss = 3.2622e-01, PNorm = 62.8076, GNorm = 1.2647, lr_0 = 1.5075e-04
Loss = 3.7990e-01, PNorm = 62.8087, GNorm = 1.1448, lr_0 = 1.5064e-04
Loss = 3.2524e-01, PNorm = 62.8088, GNorm = 1.6735, lr_0 = 1.5054e-04
Loss = 3.3934e-01, PNorm = 62.8071, GNorm = 1.4369, lr_0 = 1.5044e-04
Loss = 3.4253e-01, PNorm = 62.8087, GNorm = 1.3614, lr_0 = 1.5033e-04
Loss = 3.2750e-01, PNorm = 62.8088, GNorm = 1.6187, lr_0 = 1.5023e-04
Loss = 3.3848e-01, PNorm = 62.8080, GNorm = 1.2623, lr_0 = 1.5013e-04
Loss = 3.4752e-01, PNorm = 62.8085, GNorm = 1.4655, lr_0 = 1.5002e-04
Loss = 3.3456e-01, PNorm = 62.8093, GNorm = 1.6692, lr_0 = 1.4992e-04
Loss = 3.6466e-01, PNorm = 62.8116, GNorm = 1.3002, lr_0 = 1.4982e-04
Loss = 3.4235e-01, PNorm = 62.8139, GNorm = 1.4800, lr_0 = 1.4972e-04
Loss = 3.8035e-01, PNorm = 62.8140, GNorm = 2.1226, lr_0 = 1.4961e-04
Loss = 4.1096e-01, PNorm = 62.8133, GNorm = 2.7130, lr_0 = 1.4951e-04
Loss = 4.0220e-01, PNorm = 62.8152, GNorm = 1.6810, lr_0 = 1.4941e-04
Loss = 3.3097e-01, PNorm = 62.8176, GNorm = 1.3661, lr_0 = 1.4931e-04
Loss = 3.1370e-01, PNorm = 62.8183, GNorm = 1.1431, lr_0 = 1.4920e-04
Loss = 3.5537e-01, PNorm = 62.8197, GNorm = 1.4735, lr_0 = 1.4910e-04
Loss = 3.5463e-01, PNorm = 62.8225, GNorm = 1.5065, lr_0 = 1.4900e-04
Loss = 3.2262e-01, PNorm = 62.8240, GNorm = 1.6034, lr_0 = 1.4890e-04
Loss = 3.4604e-01, PNorm = 62.8240, GNorm = 1.4519, lr_0 = 1.4880e-04
Loss = 3.7385e-01, PNorm = 62.8233, GNorm = 2.1359, lr_0 = 1.4869e-04
Loss = 4.0270e-01, PNorm = 62.8257, GNorm = 1.7112, lr_0 = 1.4859e-04
Loss = 3.7810e-01, PNorm = 62.8278, GNorm = 2.4801, lr_0 = 1.4849e-04
Loss = 3.1191e-01, PNorm = 62.8287, GNorm = 1.6438, lr_0 = 1.4839e-04
Loss = 3.3931e-01, PNorm = 62.8306, GNorm = 1.3977, lr_0 = 1.4829e-04
Loss = 3.5586e-01, PNorm = 62.8334, GNorm = 1.3998, lr_0 = 1.4818e-04
Loss = 3.0911e-01, PNorm = 62.8347, GNorm = 1.2370, lr_0 = 1.4808e-04
Loss = 3.5047e-01, PNorm = 62.8338, GNorm = 1.6186, lr_0 = 1.4798e-04
Loss = 3.3848e-01, PNorm = 62.8349, GNorm = 1.2921, lr_0 = 1.4788e-04
Loss = 3.2513e-01, PNorm = 62.8382, GNorm = 1.4840, lr_0 = 1.4778e-04
Loss = 3.5022e-01, PNorm = 62.8406, GNorm = 1.7505, lr_0 = 1.4768e-04
Loss = 3.5480e-01, PNorm = 62.8423, GNorm = 1.2350, lr_0 = 1.4758e-04
Loss = 3.4412e-01, PNorm = 62.8441, GNorm = 2.6324, lr_0 = 1.4748e-04
Loss = 3.9097e-01, PNorm = 62.8440, GNorm = 1.6915, lr_0 = 1.4737e-04
Loss = 3.9337e-01, PNorm = 62.8452, GNorm = 1.5776, lr_0 = 1.4727e-04
Loss = 3.9168e-01, PNorm = 62.8491, GNorm = 1.9883, lr_0 = 1.4717e-04
Loss = 3.6913e-01, PNorm = 62.8494, GNorm = 1.5235, lr_0 = 1.4707e-04
Loss = 3.1994e-01, PNorm = 62.8483, GNorm = 1.1192, lr_0 = 1.4697e-04
Loss = 3.7627e-01, PNorm = 62.8495, GNorm = 1.5337, lr_0 = 1.4687e-04
Loss = 3.3164e-01, PNorm = 62.8528, GNorm = 1.3102, lr_0 = 1.4677e-04
Loss = 3.5305e-01, PNorm = 62.8552, GNorm = 3.2341, lr_0 = 1.4667e-04
Loss = 3.3190e-01, PNorm = 62.8558, GNorm = 1.9794, lr_0 = 1.4657e-04
Loss = 3.6210e-01, PNorm = 62.8574, GNorm = 1.2314, lr_0 = 1.4647e-04
Loss = 4.2382e-01, PNorm = 62.8588, GNorm = 1.6475, lr_0 = 1.4637e-04
Loss = 3.6545e-01, PNorm = 62.8604, GNorm = 1.5468, lr_0 = 1.4627e-04
Loss = 2.8730e-01, PNorm = 62.8628, GNorm = 1.3607, lr_0 = 1.4617e-04
Loss = 3.4609e-01, PNorm = 62.8635, GNorm = 1.1514, lr_0 = 1.4607e-04
Loss = 3.1965e-01, PNorm = 62.8640, GNorm = 0.7690, lr_0 = 1.4597e-04
Loss = 3.3805e-01, PNorm = 62.8648, GNorm = 1.4247, lr_0 = 1.4587e-04
Loss = 3.4874e-01, PNorm = 62.8645, GNorm = 1.6582, lr_0 = 1.4577e-04
Loss = 3.7999e-01, PNorm = 62.8652, GNorm = 1.4584, lr_0 = 1.4567e-04
Loss = 3.6186e-01, PNorm = 62.8673, GNorm = 1.2353, lr_0 = 1.4557e-04
Loss = 3.8711e-01, PNorm = 62.8706, GNorm = 1.4830, lr_0 = 1.4547e-04
Loss = 3.3331e-01, PNorm = 62.8731, GNorm = 1.4583, lr_0 = 1.4537e-04
Loss = 3.6732e-01, PNorm = 62.8751, GNorm = 1.5968, lr_0 = 1.4527e-04
Loss = 3.2960e-01, PNorm = 62.8750, GNorm = 1.3142, lr_0 = 1.4517e-04
Loss = 2.7225e-01, PNorm = 62.8758, GNorm = 1.3043, lr_0 = 1.4507e-04
Loss = 3.2630e-01, PNorm = 62.8758, GNorm = 1.2804, lr_0 = 1.4497e-04
Loss = 4.1202e-01, PNorm = 62.8765, GNorm = 1.3927, lr_0 = 1.4487e-04
Loss = 3.4392e-01, PNorm = 62.8819, GNorm = 1.2948, lr_0 = 1.4477e-04
Loss = 3.8636e-01, PNorm = 62.8847, GNorm = 1.7556, lr_0 = 1.4467e-04
Loss = 3.5475e-01, PNorm = 62.8874, GNorm = 1.8415, lr_0 = 1.4457e-04
Loss = 3.5573e-01, PNorm = 62.8883, GNorm = 1.1210, lr_0 = 1.4447e-04
Loss = 3.4646e-01, PNorm = 62.8897, GNorm = 2.0095, lr_0 = 1.4438e-04
Loss = 3.3963e-01, PNorm = 62.8903, GNorm = 1.3244, lr_0 = 1.4428e-04
Loss = 3.1835e-01, PNorm = 62.8931, GNorm = 1.3752, lr_0 = 1.4418e-04
Loss = 3.2522e-01, PNorm = 62.8944, GNorm = 1.4125, lr_0 = 1.4408e-04
Loss = 3.1430e-01, PNorm = 62.8937, GNorm = 2.4587, lr_0 = 1.4398e-04
Loss = 3.6599e-01, PNorm = 62.8959, GNorm = 1.6057, lr_0 = 1.4388e-04
Loss = 3.5783e-01, PNorm = 62.8972, GNorm = 1.2563, lr_0 = 1.4378e-04
Loss = 3.6686e-01, PNorm = 62.8978, GNorm = 1.9269, lr_0 = 1.4368e-04
Loss = 2.9831e-01, PNorm = 62.9005, GNorm = 1.0988, lr_0 = 1.4359e-04
Loss = 3.4591e-01, PNorm = 62.9014, GNorm = 1.7663, lr_0 = 1.4349e-04
Loss = 3.8116e-01, PNorm = 62.9016, GNorm = 2.0302, lr_0 = 1.4339e-04
Loss = 4.2777e-01, PNorm = 62.9003, GNorm = 1.4333, lr_0 = 1.4329e-04
Loss = 3.4578e-01, PNorm = 62.9019, GNorm = 1.4267, lr_0 = 1.4319e-04
Loss = 3.7107e-01, PNorm = 62.9034, GNorm = 1.5006, lr_0 = 1.4310e-04
Loss = 3.7115e-01, PNorm = 62.9040, GNorm = 1.1777, lr_0 = 1.4300e-04
Loss = 3.4830e-01, PNorm = 62.9065, GNorm = 1.0920, lr_0 = 1.4290e-04
Loss = 2.9070e-01, PNorm = 62.9098, GNorm = 1.4360, lr_0 = 1.4280e-04
Loss = 3.4903e-01, PNorm = 62.9117, GNorm = 1.4655, lr_0 = 1.4270e-04
Loss = 3.5269e-01, PNorm = 62.9100, GNorm = 1.6732, lr_0 = 1.4261e-04
Loss = 2.9617e-01, PNorm = 62.9100, GNorm = 1.3072, lr_0 = 1.4251e-04
Loss = 3.1537e-01, PNorm = 62.9106, GNorm = 1.3049, lr_0 = 1.4241e-04
Loss = 3.2567e-01, PNorm = 62.9117, GNorm = 1.7967, lr_0 = 1.4231e-04
Loss = 3.8209e-01, PNorm = 62.9117, GNorm = 1.1312, lr_0 = 1.4222e-04
Loss = 3.6047e-01, PNorm = 62.9138, GNorm = 1.0836, lr_0 = 1.4212e-04
Loss = 3.2642e-01, PNorm = 62.9176, GNorm = 1.3108, lr_0 = 1.4202e-04
Loss = 3.3770e-01, PNorm = 62.9203, GNorm = 1.5206, lr_0 = 1.4192e-04
Loss = 3.9863e-01, PNorm = 62.9213, GNorm = 1.4442, lr_0 = 1.4183e-04
Loss = 3.1667e-01, PNorm = 62.9221, GNorm = 1.3071, lr_0 = 1.4173e-04
Loss = 3.3257e-01, PNorm = 62.9216, GNorm = 1.2485, lr_0 = 1.4163e-04
Loss = 3.6192e-01, PNorm = 62.9210, GNorm = 1.5175, lr_0 = 1.4153e-04
Loss = 3.1706e-01, PNorm = 62.9205, GNorm = 0.9195, lr_0 = 1.4144e-04
Loss = 4.0474e-01, PNorm = 62.9221, GNorm = 2.3738, lr_0 = 1.4134e-04
Loss = 3.5640e-01, PNorm = 62.9256, GNorm = 1.6057, lr_0 = 1.4124e-04
Loss = 3.2431e-01, PNorm = 62.9282, GNorm = 1.5968, lr_0 = 1.4115e-04
Loss = 3.1768e-01, PNorm = 62.9286, GNorm = 1.3766, lr_0 = 1.4105e-04
Loss = 3.5278e-01, PNorm = 62.9276, GNorm = 1.6598, lr_0 = 1.4095e-04
Loss = 3.1275e-01, PNorm = 62.9281, GNorm = 1.3519, lr_0 = 1.4086e-04
Loss = 3.7084e-01, PNorm = 62.9284, GNorm = 0.9645, lr_0 = 1.4076e-04
Loss = 3.4081e-01, PNorm = 62.9286, GNorm = 1.2069, lr_0 = 1.4066e-04
Loss = 3.5557e-01, PNorm = 62.9305, GNorm = 1.3323, lr_0 = 1.4057e-04
Loss = 3.1097e-01, PNorm = 62.9335, GNorm = 1.4510, lr_0 = 1.4047e-04
Loss = 3.4719e-01, PNorm = 62.9328, GNorm = 1.8316, lr_0 = 1.4038e-04
Loss = 3.6328e-01, PNorm = 62.9337, GNorm = 1.4954, lr_0 = 1.4028e-04
Loss = 3.5090e-01, PNorm = 62.9335, GNorm = 1.5382, lr_0 = 1.4018e-04
Loss = 4.1125e-01, PNorm = 62.9336, GNorm = 1.9369, lr_0 = 1.4009e-04
Loss = 3.4531e-01, PNorm = 62.9350, GNorm = 0.8458, lr_0 = 1.3999e-04
Loss = 3.2880e-01, PNorm = 62.9356, GNorm = 2.1816, lr_0 = 1.3990e-04
Loss = 3.7131e-01, PNorm = 62.9348, GNorm = 1.4000, lr_0 = 1.3980e-04
Loss = 3.5891e-01, PNorm = 62.9359, GNorm = 1.0595, lr_0 = 1.3970e-04
Loss = 3.6497e-01, PNorm = 62.9373, GNorm = 1.2517, lr_0 = 1.3961e-04
Loss = 3.4324e-01, PNorm = 62.9384, GNorm = 1.7936, lr_0 = 1.3951e-04
Loss = 3.8834e-01, PNorm = 62.9380, GNorm = 2.0951, lr_0 = 1.3942e-04
Loss = 3.1114e-01, PNorm = 62.9386, GNorm = 1.3925, lr_0 = 1.3932e-04
Loss = 3.3475e-01, PNorm = 62.9419, GNorm = 1.3319, lr_0 = 1.3923e-04
Loss = 3.6034e-01, PNorm = 62.9413, GNorm = 1.3431, lr_0 = 1.3913e-04
Loss = 3.8378e-01, PNorm = 62.9437, GNorm = 2.1293, lr_0 = 1.3904e-04
Loss = 3.5532e-01, PNorm = 62.9470, GNorm = 1.5371, lr_0 = 1.3894e-04
Validation mae = 0.110650
Epoch 26
Loss = 3.1611e-01, PNorm = 62.9501, GNorm = 1.3490, lr_0 = 1.3884e-04
Loss = 3.4068e-01, PNorm = 62.9517, GNorm = 1.4289, lr_0 = 1.3875e-04
Loss = 3.5371e-01, PNorm = 62.9535, GNorm = 1.4582, lr_0 = 1.3865e-04
Loss = 3.5662e-01, PNorm = 62.9541, GNorm = 1.2957, lr_0 = 1.3856e-04
Loss = 3.5304e-01, PNorm = 62.9555, GNorm = 1.4808, lr_0 = 1.3846e-04
Loss = 3.3799e-01, PNorm = 62.9574, GNorm = 1.0979, lr_0 = 1.3837e-04
Loss = 3.1552e-01, PNorm = 62.9580, GNorm = 1.0078, lr_0 = 1.3828e-04
Loss = 3.4669e-01, PNorm = 62.9569, GNorm = 1.2700, lr_0 = 1.3818e-04
Loss = 3.3721e-01, PNorm = 62.9576, GNorm = 1.7264, lr_0 = 1.3809e-04
Loss = 3.0580e-01, PNorm = 62.9590, GNorm = 1.2153, lr_0 = 1.3799e-04
Loss = 4.1991e-01, PNorm = 62.9595, GNorm = 1.9859, lr_0 = 1.3790e-04
Loss = 3.2718e-01, PNorm = 62.9584, GNorm = 1.9618, lr_0 = 1.3780e-04
Loss = 2.9732e-01, PNorm = 62.9582, GNorm = 1.6610, lr_0 = 1.3771e-04
Loss = 3.3183e-01, PNorm = 62.9598, GNorm = 1.5409, lr_0 = 1.3761e-04
Loss = 3.1281e-01, PNorm = 62.9602, GNorm = 1.0087, lr_0 = 1.3752e-04
Loss = 3.7659e-01, PNorm = 62.9610, GNorm = 1.3811, lr_0 = 1.3742e-04
Loss = 3.1865e-01, PNorm = 62.9629, GNorm = 1.3775, lr_0 = 1.3733e-04
Loss = 3.8268e-01, PNorm = 62.9639, GNorm = 1.7266, lr_0 = 1.3724e-04
Loss = 3.7110e-01, PNorm = 62.9636, GNorm = 1.4764, lr_0 = 1.3714e-04
Loss = 3.5423e-01, PNorm = 62.9648, GNorm = 1.2094, lr_0 = 1.3705e-04
Loss = 2.9743e-01, PNorm = 62.9656, GNorm = 1.6414, lr_0 = 1.3695e-04
Loss = 3.1570e-01, PNorm = 62.9664, GNorm = 1.2613, lr_0 = 1.3686e-04
Loss = 3.4334e-01, PNorm = 62.9666, GNorm = 1.7594, lr_0 = 1.3677e-04
Loss = 4.0195e-01, PNorm = 62.9678, GNorm = 1.0710, lr_0 = 1.3667e-04
Loss = 2.9961e-01, PNorm = 62.9689, GNorm = 1.8297, lr_0 = 1.3658e-04
Loss = 3.4113e-01, PNorm = 62.9707, GNorm = 1.5220, lr_0 = 1.3649e-04
Loss = 3.6212e-01, PNorm = 62.9706, GNorm = 1.2353, lr_0 = 1.3639e-04
Loss = 3.8605e-01, PNorm = 62.9717, GNorm = 1.3666, lr_0 = 1.3630e-04
Loss = 3.8973e-01, PNorm = 62.9744, GNorm = 1.3816, lr_0 = 1.3621e-04
Loss = 3.3337e-01, PNorm = 62.9745, GNorm = 1.2449, lr_0 = 1.3611e-04
Loss = 3.5768e-01, PNorm = 62.9741, GNorm = 1.2504, lr_0 = 1.3602e-04
Loss = 3.6178e-01, PNorm = 62.9746, GNorm = 1.6434, lr_0 = 1.3593e-04
Loss = 3.8056e-01, PNorm = 62.9755, GNorm = 1.6369, lr_0 = 1.3583e-04
Loss = 3.5965e-01, PNorm = 62.9767, GNorm = 2.2461, lr_0 = 1.3574e-04
Loss = 3.5515e-01, PNorm = 62.9790, GNorm = 1.2181, lr_0 = 1.3565e-04
Loss = 3.4818e-01, PNorm = 62.9797, GNorm = 1.4745, lr_0 = 1.3555e-04
Loss = 3.2157e-01, PNorm = 62.9804, GNorm = 1.9183, lr_0 = 1.3546e-04
Loss = 3.5254e-01, PNorm = 62.9813, GNorm = 1.7156, lr_0 = 1.3537e-04
Loss = 3.8854e-01, PNorm = 62.9825, GNorm = 1.7298, lr_0 = 1.3528e-04
Loss = 3.6919e-01, PNorm = 62.9856, GNorm = 1.6373, lr_0 = 1.3518e-04
Loss = 3.4607e-01, PNorm = 62.9848, GNorm = 1.5528, lr_0 = 1.3509e-04
Loss = 3.5393e-01, PNorm = 62.9836, GNorm = 1.3842, lr_0 = 1.3500e-04
Loss = 3.3122e-01, PNorm = 62.9858, GNorm = 1.1629, lr_0 = 1.3491e-04
Loss = 3.6091e-01, PNorm = 62.9891, GNorm = 1.9155, lr_0 = 1.3481e-04
Loss = 3.7336e-01, PNorm = 62.9909, GNorm = 1.3736, lr_0 = 1.3472e-04
Loss = 3.4789e-01, PNorm = 62.9935, GNorm = 1.4454, lr_0 = 1.3463e-04
Loss = 3.3757e-01, PNorm = 62.9937, GNorm = 1.5905, lr_0 = 1.3454e-04
Loss = 3.4212e-01, PNorm = 62.9928, GNorm = 1.3429, lr_0 = 1.3444e-04
Loss = 3.3747e-01, PNorm = 62.9912, GNorm = 1.8873, lr_0 = 1.3435e-04
Loss = 3.2743e-01, PNorm = 62.9905, GNorm = 1.1283, lr_0 = 1.3426e-04
Loss = 3.2680e-01, PNorm = 62.9906, GNorm = 1.1032, lr_0 = 1.3417e-04
Loss = 3.8671e-01, PNorm = 62.9923, GNorm = 1.4889, lr_0 = 1.3408e-04
Loss = 3.2533e-01, PNorm = 62.9950, GNorm = 1.2744, lr_0 = 1.3398e-04
Loss = 3.1885e-01, PNorm = 62.9976, GNorm = 1.7432, lr_0 = 1.3389e-04
Loss = 3.2888e-01, PNorm = 63.0007, GNorm = 1.4401, lr_0 = 1.3380e-04
Loss = 3.2476e-01, PNorm = 63.0010, GNorm = 1.2934, lr_0 = 1.3371e-04
Loss = 3.1239e-01, PNorm = 63.0010, GNorm = 1.6845, lr_0 = 1.3362e-04
Loss = 2.8124e-01, PNorm = 63.0016, GNorm = 1.3199, lr_0 = 1.3353e-04
Loss = 3.4955e-01, PNorm = 63.0026, GNorm = 1.2622, lr_0 = 1.3343e-04
Loss = 3.4381e-01, PNorm = 63.0053, GNorm = 1.9803, lr_0 = 1.3334e-04
Loss = 3.1303e-01, PNorm = 63.0052, GNorm = 1.0546, lr_0 = 1.3325e-04
Loss = 3.6589e-01, PNorm = 63.0055, GNorm = 1.1859, lr_0 = 1.3316e-04
Loss = 3.6082e-01, PNorm = 63.0060, GNorm = 1.5462, lr_0 = 1.3307e-04
Loss = 3.4841e-01, PNorm = 63.0082, GNorm = 1.4382, lr_0 = 1.3298e-04
Loss = 3.9155e-01, PNorm = 63.0102, GNorm = 1.0374, lr_0 = 1.3289e-04
Loss = 3.8533e-01, PNorm = 63.0125, GNorm = 1.8171, lr_0 = 1.3280e-04
Loss = 3.7177e-01, PNorm = 63.0120, GNorm = 2.3852, lr_0 = 1.3270e-04
Loss = 3.1520e-01, PNorm = 63.0115, GNorm = 1.0060, lr_0 = 1.3261e-04
Loss = 3.4458e-01, PNorm = 63.0127, GNorm = 1.3047, lr_0 = 1.3252e-04
Loss = 3.7811e-01, PNorm = 63.0153, GNorm = 1.9421, lr_0 = 1.3243e-04
Loss = 3.6337e-01, PNorm = 63.0171, GNorm = 1.8276, lr_0 = 1.3234e-04
Loss = 3.1527e-01, PNorm = 63.0178, GNorm = 1.4687, lr_0 = 1.3225e-04
Loss = 2.8884e-01, PNorm = 63.0197, GNorm = 1.0674, lr_0 = 1.3216e-04
Loss = 3.8412e-01, PNorm = 63.0211, GNorm = 1.7280, lr_0 = 1.3207e-04
Loss = 3.4315e-01, PNorm = 63.0234, GNorm = 1.2613, lr_0 = 1.3198e-04
Loss = 3.9781e-01, PNorm = 63.0240, GNorm = 2.2610, lr_0 = 1.3189e-04
Loss = 3.2951e-01, PNorm = 63.0244, GNorm = 1.1106, lr_0 = 1.3180e-04
Loss = 3.2456e-01, PNorm = 63.0270, GNorm = 1.4834, lr_0 = 1.3171e-04
Loss = 3.4757e-01, PNorm = 63.0300, GNorm = 1.5708, lr_0 = 1.3162e-04
Loss = 3.6595e-01, PNorm = 63.0284, GNorm = 1.2915, lr_0 = 1.3153e-04
Loss = 3.5543e-01, PNorm = 63.0296, GNorm = 1.9836, lr_0 = 1.3144e-04
Loss = 4.1697e-01, PNorm = 63.0326, GNorm = 1.4712, lr_0 = 1.3135e-04
Loss = 3.0934e-01, PNorm = 63.0339, GNorm = 1.4830, lr_0 = 1.3126e-04
Loss = 3.1281e-01, PNorm = 63.0358, GNorm = 1.3726, lr_0 = 1.3117e-04
Loss = 3.2380e-01, PNorm = 63.0365, GNorm = 1.8996, lr_0 = 1.3108e-04
Loss = 3.6948e-01, PNorm = 63.0365, GNorm = 1.3773, lr_0 = 1.3099e-04
Loss = 3.6956e-01, PNorm = 63.0386, GNorm = 1.8482, lr_0 = 1.3090e-04
Loss = 3.7158e-01, PNorm = 63.0390, GNorm = 1.2202, lr_0 = 1.3081e-04
Loss = 3.2436e-01, PNorm = 63.0399, GNorm = 1.1102, lr_0 = 1.3072e-04
Loss = 3.7949e-01, PNorm = 63.0416, GNorm = 1.8078, lr_0 = 1.3063e-04
Loss = 3.7622e-01, PNorm = 63.0425, GNorm = 1.4513, lr_0 = 1.3054e-04
Loss = 3.2686e-01, PNorm = 63.0435, GNorm = 1.3631, lr_0 = 1.3045e-04
Loss = 3.4413e-01, PNorm = 63.0425, GNorm = 1.1887, lr_0 = 1.3036e-04
Loss = 3.5336e-01, PNorm = 63.0433, GNorm = 1.5160, lr_0 = 1.3027e-04
Loss = 3.7311e-01, PNorm = 63.0448, GNorm = 1.1921, lr_0 = 1.3018e-04
Loss = 3.4263e-01, PNorm = 63.0456, GNorm = 1.6883, lr_0 = 1.3009e-04
Loss = 3.6409e-01, PNorm = 63.0455, GNorm = 2.3589, lr_0 = 1.3000e-04
Loss = 3.6033e-01, PNorm = 63.0470, GNorm = 2.2179, lr_0 = 1.2992e-04
Loss = 3.3369e-01, PNorm = 63.0513, GNorm = 1.9799, lr_0 = 1.2983e-04
Loss = 3.3781e-01, PNorm = 63.0530, GNorm = 1.2364, lr_0 = 1.2974e-04
Loss = 3.0574e-01, PNorm = 63.0524, GNorm = 1.2515, lr_0 = 1.2965e-04
Loss = 3.9428e-01, PNorm = 63.0520, GNorm = 1.3829, lr_0 = 1.2956e-04
Loss = 3.9937e-01, PNorm = 63.0521, GNorm = 2.0360, lr_0 = 1.2947e-04
Loss = 3.4323e-01, PNorm = 63.0537, GNorm = 1.4139, lr_0 = 1.2938e-04
Loss = 3.5368e-01, PNorm = 63.0551, GNorm = 1.7251, lr_0 = 1.2929e-04
Loss = 3.1968e-01, PNorm = 63.0566, GNorm = 1.0129, lr_0 = 1.2921e-04
Loss = 2.9497e-01, PNorm = 63.0575, GNorm = 1.3613, lr_0 = 1.2912e-04
Loss = 3.1306e-01, PNorm = 63.0573, GNorm = 1.3803, lr_0 = 1.2903e-04
Loss = 4.2506e-01, PNorm = 63.0572, GNorm = 1.9552, lr_0 = 1.2894e-04
Loss = 3.3552e-01, PNorm = 63.0578, GNorm = 1.7465, lr_0 = 1.2885e-04
Loss = 3.9269e-01, PNorm = 63.0591, GNorm = 1.6957, lr_0 = 1.2876e-04
Loss = 3.8313e-01, PNorm = 63.0605, GNorm = 1.6704, lr_0 = 1.2867e-04
Loss = 3.0728e-01, PNorm = 63.0623, GNorm = 1.3806, lr_0 = 1.2859e-04
Loss = 3.6522e-01, PNorm = 63.0623, GNorm = 1.5296, lr_0 = 1.2850e-04
Loss = 3.5938e-01, PNorm = 63.0642, GNorm = 1.3566, lr_0 = 1.2841e-04
Loss = 3.6055e-01, PNorm = 63.0641, GNorm = 1.5264, lr_0 = 1.2832e-04
Loss = 3.3212e-01, PNorm = 63.0641, GNorm = 1.5139, lr_0 = 1.2823e-04
Loss = 3.9657e-01, PNorm = 63.0660, GNorm = 1.5997, lr_0 = 1.2815e-04
Loss = 3.5005e-01, PNorm = 63.0691, GNorm = 1.2208, lr_0 = 1.2806e-04
Loss = 4.2035e-01, PNorm = 63.0706, GNorm = 1.6903, lr_0 = 1.2797e-04
Validation mae = 0.111154
Epoch 27
Loss = 3.2100e-01, PNorm = 63.0720, GNorm = 1.3917, lr_0 = 1.2788e-04
Loss = 3.7888e-01, PNorm = 63.0735, GNorm = 1.4319, lr_0 = 1.2780e-04
Loss = 3.3776e-01, PNorm = 63.0727, GNorm = 1.5019, lr_0 = 1.2771e-04
Loss = 3.5945e-01, PNorm = 63.0741, GNorm = 1.1570, lr_0 = 1.2762e-04
Loss = 3.1100e-01, PNorm = 63.0758, GNorm = 1.2799, lr_0 = 1.2753e-04
Loss = 3.8584e-01, PNorm = 63.0756, GNorm = 1.7571, lr_0 = 1.2745e-04
Loss = 3.4982e-01, PNorm = 63.0770, GNorm = 1.6026, lr_0 = 1.2736e-04
Loss = 3.7970e-01, PNorm = 63.0793, GNorm = 1.6136, lr_0 = 1.2727e-04
Loss = 3.2229e-01, PNorm = 63.0796, GNorm = 1.3434, lr_0 = 1.2718e-04
Loss = 3.0738e-01, PNorm = 63.0825, GNorm = 1.4950, lr_0 = 1.2710e-04
Loss = 3.3794e-01, PNorm = 63.0849, GNorm = 1.7263, lr_0 = 1.2701e-04
Loss = 3.4823e-01, PNorm = 63.0845, GNorm = 1.1391, lr_0 = 1.2692e-04
Loss = 3.1369e-01, PNorm = 63.0857, GNorm = 1.7017, lr_0 = 1.2684e-04
Loss = 3.5526e-01, PNorm = 63.0870, GNorm = 2.2519, lr_0 = 1.2675e-04
Loss = 3.6479e-01, PNorm = 63.0873, GNorm = 1.3241, lr_0 = 1.2666e-04
Loss = 3.2872e-01, PNorm = 63.0887, GNorm = 1.8051, lr_0 = 1.2658e-04
Loss = 3.2298e-01, PNorm = 63.0912, GNorm = 1.5311, lr_0 = 1.2649e-04
Loss = 3.8525e-01, PNorm = 63.0921, GNorm = 1.4780, lr_0 = 1.2640e-04
Loss = 3.4512e-01, PNorm = 63.0934, GNorm = 0.8842, lr_0 = 1.2632e-04
Loss = 2.8877e-01, PNorm = 63.0958, GNorm = 1.0576, lr_0 = 1.2623e-04
Loss = 3.6904e-01, PNorm = 63.0977, GNorm = 2.9257, lr_0 = 1.2614e-04
Loss = 3.2650e-01, PNorm = 63.0979, GNorm = 1.1311, lr_0 = 1.2606e-04
Loss = 3.1078e-01, PNorm = 63.0965, GNorm = 1.8330, lr_0 = 1.2597e-04
Loss = 3.3216e-01, PNorm = 63.0971, GNorm = 1.2986, lr_0 = 1.2588e-04
Loss = 3.6133e-01, PNorm = 63.0985, GNorm = 1.4229, lr_0 = 1.2580e-04
Loss = 3.6496e-01, PNorm = 63.1004, GNorm = 1.8978, lr_0 = 1.2571e-04
Loss = 3.1607e-01, PNorm = 63.1042, GNorm = 1.8291, lr_0 = 1.2563e-04
Loss = 3.5836e-01, PNorm = 63.1053, GNorm = 1.2694, lr_0 = 1.2554e-04
Loss = 4.2552e-01, PNorm = 63.1048, GNorm = 1.4220, lr_0 = 1.2545e-04
Loss = 3.5367e-01, PNorm = 63.1065, GNorm = 1.5393, lr_0 = 1.2537e-04
Loss = 3.2108e-01, PNorm = 63.1088, GNorm = 2.2860, lr_0 = 1.2528e-04
Loss = 3.2509e-01, PNorm = 63.1091, GNorm = 1.4111, lr_0 = 1.2520e-04
Loss = 3.3677e-01, PNorm = 63.1105, GNorm = 1.3818, lr_0 = 1.2511e-04
Loss = 3.2150e-01, PNorm = 63.1122, GNorm = 2.1459, lr_0 = 1.2502e-04
Loss = 3.2709e-01, PNorm = 63.1149, GNorm = 1.4029, lr_0 = 1.2494e-04
Loss = 3.2283e-01, PNorm = 63.1161, GNorm = 0.9861, lr_0 = 1.2485e-04
Loss = 3.8838e-01, PNorm = 63.1162, GNorm = 1.7008, lr_0 = 1.2477e-04
Loss = 3.1304e-01, PNorm = 63.1186, GNorm = 1.4648, lr_0 = 1.2468e-04
Loss = 3.0778e-01, PNorm = 63.1207, GNorm = 1.0862, lr_0 = 1.2460e-04
Loss = 3.8939e-01, PNorm = 63.1211, GNorm = 1.6485, lr_0 = 1.2451e-04
Loss = 3.3061e-01, PNorm = 63.1224, GNorm = 1.2920, lr_0 = 1.2443e-04
Loss = 3.5308e-01, PNorm = 63.1232, GNorm = 1.1408, lr_0 = 1.2434e-04
Loss = 3.9646e-01, PNorm = 63.1225, GNorm = 1.9118, lr_0 = 1.2426e-04
Loss = 3.4267e-01, PNorm = 63.1240, GNorm = 1.2868, lr_0 = 1.2417e-04
Loss = 3.4572e-01, PNorm = 63.1257, GNorm = 1.7335, lr_0 = 1.2409e-04
Loss = 3.3264e-01, PNorm = 63.1270, GNorm = 1.3848, lr_0 = 1.2400e-04
Loss = 3.6939e-01, PNorm = 63.1280, GNorm = 2.1843, lr_0 = 1.2392e-04
Loss = 2.7839e-01, PNorm = 63.1301, GNorm = 1.2615, lr_0 = 1.2383e-04
Loss = 3.4342e-01, PNorm = 63.1313, GNorm = 1.2825, lr_0 = 1.2375e-04
Loss = 3.4268e-01, PNorm = 63.1319, GNorm = 1.6770, lr_0 = 1.2366e-04
Loss = 3.3834e-01, PNorm = 63.1328, GNorm = 1.5610, lr_0 = 1.2358e-04
Loss = 3.4838e-01, PNorm = 63.1334, GNorm = 1.4593, lr_0 = 1.2349e-04
Loss = 3.3825e-01, PNorm = 63.1340, GNorm = 1.3056, lr_0 = 1.2341e-04
Loss = 3.3722e-01, PNorm = 63.1354, GNorm = 1.6265, lr_0 = 1.2332e-04
Loss = 3.8961e-01, PNorm = 63.1365, GNorm = 1.2975, lr_0 = 1.2324e-04
Loss = 3.5942e-01, PNorm = 63.1368, GNorm = 1.1596, lr_0 = 1.2315e-04
Loss = 3.6786e-01, PNorm = 63.1386, GNorm = 1.6135, lr_0 = 1.2307e-04
Loss = 3.4358e-01, PNorm = 63.1397, GNorm = 1.6777, lr_0 = 1.2298e-04
Loss = 3.2040e-01, PNorm = 63.1386, GNorm = 1.5550, lr_0 = 1.2290e-04
Loss = 3.5019e-01, PNorm = 63.1378, GNorm = 1.8928, lr_0 = 1.2282e-04
Loss = 3.0738e-01, PNorm = 63.1383, GNorm = 1.9620, lr_0 = 1.2273e-04
Loss = 3.4548e-01, PNorm = 63.1396, GNorm = 1.4223, lr_0 = 1.2265e-04
Loss = 3.4636e-01, PNorm = 63.1417, GNorm = 1.7904, lr_0 = 1.2256e-04
Loss = 3.3208e-01, PNorm = 63.1424, GNorm = 1.7753, lr_0 = 1.2248e-04
Loss = 3.3441e-01, PNorm = 63.1435, GNorm = 1.1847, lr_0 = 1.2240e-04
Loss = 4.0074e-01, PNorm = 63.1445, GNorm = 2.7865, lr_0 = 1.2231e-04
Loss = 3.5750e-01, PNorm = 63.1459, GNorm = 1.6914, lr_0 = 1.2223e-04
Loss = 3.3495e-01, PNorm = 63.1475, GNorm = 1.2314, lr_0 = 1.2214e-04
Loss = 3.3124e-01, PNorm = 63.1503, GNorm = 1.3050, lr_0 = 1.2206e-04
Loss = 3.1666e-01, PNorm = 63.1532, GNorm = 1.2015, lr_0 = 1.2198e-04
Loss = 3.1045e-01, PNorm = 63.1536, GNorm = 1.9940, lr_0 = 1.2189e-04
Loss = 3.2712e-01, PNorm = 63.1539, GNorm = 1.1405, lr_0 = 1.2181e-04
Loss = 3.4114e-01, PNorm = 63.1534, GNorm = 1.5566, lr_0 = 1.2173e-04
Loss = 3.5747e-01, PNorm = 63.1541, GNorm = 1.4727, lr_0 = 1.2164e-04
Loss = 3.1175e-01, PNorm = 63.1558, GNorm = 1.7915, lr_0 = 1.2156e-04
Loss = 3.5274e-01, PNorm = 63.1582, GNorm = 1.8228, lr_0 = 1.2148e-04
Loss = 4.1478e-01, PNorm = 63.1582, GNorm = 1.8475, lr_0 = 1.2139e-04
Loss = 3.3874e-01, PNorm = 63.1582, GNorm = 1.5734, lr_0 = 1.2131e-04
Loss = 3.7332e-01, PNorm = 63.1602, GNorm = 1.7441, lr_0 = 1.2123e-04
Loss = 3.0559e-01, PNorm = 63.1627, GNorm = 1.1037, lr_0 = 1.2114e-04
Loss = 3.3198e-01, PNorm = 63.1644, GNorm = 2.2771, lr_0 = 1.2106e-04
Loss = 3.6497e-01, PNorm = 63.1676, GNorm = 1.7924, lr_0 = 1.2098e-04
Loss = 3.1136e-01, PNorm = 63.1685, GNorm = 1.2313, lr_0 = 1.2090e-04
Loss = 4.2123e-01, PNorm = 63.1693, GNorm = 1.5598, lr_0 = 1.2081e-04
Loss = 3.7488e-01, PNorm = 63.1707, GNorm = 1.8180, lr_0 = 1.2073e-04
Loss = 4.1469e-01, PNorm = 63.1728, GNorm = 1.5302, lr_0 = 1.2065e-04
Loss = 3.0149e-01, PNorm = 63.1739, GNorm = 1.1539, lr_0 = 1.2056e-04
Loss = 3.1275e-01, PNorm = 63.1755, GNorm = 1.9636, lr_0 = 1.2048e-04
Loss = 4.0515e-01, PNorm = 63.1765, GNorm = 1.5044, lr_0 = 1.2040e-04
Loss = 3.6586e-01, PNorm = 63.1758, GNorm = 1.5954, lr_0 = 1.2032e-04
Loss = 3.5936e-01, PNorm = 63.1764, GNorm = 2.0677, lr_0 = 1.2023e-04
Loss = 3.5171e-01, PNorm = 63.1782, GNorm = 1.5422, lr_0 = 1.2015e-04
Loss = 3.5433e-01, PNorm = 63.1800, GNorm = 1.5051, lr_0 = 1.2007e-04
Loss = 3.6676e-01, PNorm = 63.1815, GNorm = 1.2102, lr_0 = 1.1999e-04
Loss = 3.1931e-01, PNorm = 63.1832, GNorm = 1.3794, lr_0 = 1.1991e-04
Loss = 3.8769e-01, PNorm = 63.1842, GNorm = 1.9256, lr_0 = 1.1982e-04
Loss = 3.0698e-01, PNorm = 63.1837, GNorm = 1.2489, lr_0 = 1.1974e-04
Loss = 3.5401e-01, PNorm = 63.1843, GNorm = 1.7251, lr_0 = 1.1966e-04
Loss = 3.3853e-01, PNorm = 63.1842, GNorm = 1.8165, lr_0 = 1.1958e-04
Loss = 3.5731e-01, PNorm = 63.1848, GNorm = 1.5025, lr_0 = 1.1950e-04
Loss = 3.6788e-01, PNorm = 63.1860, GNorm = 1.4001, lr_0 = 1.1941e-04
Loss = 3.3083e-01, PNorm = 63.1891, GNorm = 1.5610, lr_0 = 1.1933e-04
Loss = 3.3862e-01, PNorm = 63.1897, GNorm = 1.4038, lr_0 = 1.1925e-04
Loss = 3.8369e-01, PNorm = 63.1886, GNorm = 1.6774, lr_0 = 1.1917e-04
Loss = 3.4313e-01, PNorm = 63.1903, GNorm = 1.5279, lr_0 = 1.1909e-04
Loss = 3.3663e-01, PNorm = 63.1936, GNorm = 1.8046, lr_0 = 1.1901e-04
Loss = 3.5409e-01, PNorm = 63.1935, GNorm = 3.5311, lr_0 = 1.1892e-04
Loss = 3.8480e-01, PNorm = 63.1928, GNorm = 1.8641, lr_0 = 1.1884e-04
Loss = 3.0641e-01, PNorm = 63.1951, GNorm = 1.5988, lr_0 = 1.1876e-04
Loss = 3.8460e-01, PNorm = 63.1965, GNorm = 1.7605, lr_0 = 1.1868e-04
Loss = 3.7338e-01, PNorm = 63.1959, GNorm = 1.5612, lr_0 = 1.1860e-04
Loss = 3.5769e-01, PNorm = 63.1963, GNorm = 1.6736, lr_0 = 1.1852e-04
Loss = 3.8328e-01, PNorm = 63.1980, GNorm = 1.5783, lr_0 = 1.1844e-04
Loss = 3.6500e-01, PNorm = 63.1995, GNorm = 1.4065, lr_0 = 1.1835e-04
Loss = 3.2327e-01, PNorm = 63.2002, GNorm = 1.9535, lr_0 = 1.1827e-04
Loss = 3.4138e-01, PNorm = 63.2007, GNorm = 1.2674, lr_0 = 1.1819e-04
Loss = 2.9879e-01, PNorm = 63.2006, GNorm = 1.2042, lr_0 = 1.1811e-04
Loss = 3.0254e-01, PNorm = 63.2013, GNorm = 1.3369, lr_0 = 1.1803e-04
Loss = 3.1578e-01, PNorm = 63.2015, GNorm = 1.1538, lr_0 = 1.1795e-04
Loss = 3.8117e-01, PNorm = 63.2011, GNorm = 2.3386, lr_0 = 1.1787e-04
Validation mae = 0.111025
Epoch 28
Loss = 3.7684e-01, PNorm = 63.2013, GNorm = 2.4629, lr_0 = 1.1779e-04
Loss = 3.2966e-01, PNorm = 63.2039, GNorm = 1.7407, lr_0 = 1.1771e-04
Loss = 3.7212e-01, PNorm = 63.2056, GNorm = 1.1931, lr_0 = 1.1763e-04
Loss = 3.4775e-01, PNorm = 63.2067, GNorm = 1.3607, lr_0 = 1.1755e-04
Loss = 3.1843e-01, PNorm = 63.2087, GNorm = 1.0395, lr_0 = 1.1747e-04
Loss = 3.3405e-01, PNorm = 63.2108, GNorm = 1.3969, lr_0 = 1.1739e-04
Loss = 3.8041e-01, PNorm = 63.2117, GNorm = 2.2086, lr_0 = 1.1730e-04
Loss = 2.9434e-01, PNorm = 63.2139, GNorm = 1.1434, lr_0 = 1.1722e-04
Loss = 3.3228e-01, PNorm = 63.2152, GNorm = 1.4695, lr_0 = 1.1714e-04
Loss = 3.4912e-01, PNorm = 63.2164, GNorm = 1.4507, lr_0 = 1.1706e-04
Loss = 3.8551e-01, PNorm = 63.2163, GNorm = 1.6077, lr_0 = 1.1698e-04
Loss = 4.1241e-01, PNorm = 63.2166, GNorm = 1.5836, lr_0 = 1.1690e-04
Loss = 3.1358e-01, PNorm = 63.2190, GNorm = 1.6327, lr_0 = 1.1682e-04
Loss = 3.4935e-01, PNorm = 63.2207, GNorm = 1.4649, lr_0 = 1.1674e-04
Loss = 3.4915e-01, PNorm = 63.2210, GNorm = 0.9978, lr_0 = 1.1666e-04
Loss = 3.4439e-01, PNorm = 63.2218, GNorm = 1.6523, lr_0 = 1.1658e-04
Loss = 3.4032e-01, PNorm = 63.2244, GNorm = 1.7817, lr_0 = 1.1650e-04
Loss = 3.2557e-01, PNorm = 63.2243, GNorm = 1.3920, lr_0 = 1.1642e-04
Loss = 3.6715e-01, PNorm = 63.2241, GNorm = 2.0178, lr_0 = 1.1634e-04
Loss = 3.4927e-01, PNorm = 63.2253, GNorm = 1.4308, lr_0 = 1.1626e-04
Loss = 3.2146e-01, PNorm = 63.2264, GNorm = 1.3205, lr_0 = 1.1618e-04
Loss = 3.6429e-01, PNorm = 63.2282, GNorm = 1.6474, lr_0 = 1.1611e-04
Loss = 3.3101e-01, PNorm = 63.2298, GNorm = 1.5700, lr_0 = 1.1603e-04
Loss = 3.3602e-01, PNorm = 63.2307, GNorm = 1.3467, lr_0 = 1.1595e-04
Loss = 3.6983e-01, PNorm = 63.2312, GNorm = 1.1176, lr_0 = 1.1587e-04
Loss = 3.7235e-01, PNorm = 63.2318, GNorm = 1.8459, lr_0 = 1.1579e-04
Loss = 3.3615e-01, PNorm = 63.2317, GNorm = 1.1713, lr_0 = 1.1571e-04
Loss = 3.5086e-01, PNorm = 63.2329, GNorm = 1.6246, lr_0 = 1.1563e-04
Loss = 3.4008e-01, PNorm = 63.2353, GNorm = 1.3185, lr_0 = 1.1555e-04
Loss = 3.2275e-01, PNorm = 63.2358, GNorm = 1.4463, lr_0 = 1.1547e-04
Loss = 3.3224e-01, PNorm = 63.2355, GNorm = 1.3850, lr_0 = 1.1539e-04
Loss = 3.2392e-01, PNorm = 63.2386, GNorm = 1.7831, lr_0 = 1.1531e-04
Loss = 2.9655e-01, PNorm = 63.2409, GNorm = 1.2019, lr_0 = 1.1523e-04
Loss = 3.4085e-01, PNorm = 63.2404, GNorm = 1.3596, lr_0 = 1.1515e-04
Loss = 3.5070e-01, PNorm = 63.2408, GNorm = 1.3055, lr_0 = 1.1508e-04
Loss = 3.5260e-01, PNorm = 63.2410, GNorm = 1.6328, lr_0 = 1.1500e-04
Loss = 3.2154e-01, PNorm = 63.2412, GNorm = 1.4608, lr_0 = 1.1492e-04
Loss = 3.1614e-01, PNorm = 63.2413, GNorm = 1.3884, lr_0 = 1.1484e-04
Loss = 3.2642e-01, PNorm = 63.2419, GNorm = 1.8176, lr_0 = 1.1476e-04
Loss = 3.1130e-01, PNorm = 63.2414, GNorm = 1.6024, lr_0 = 1.1468e-04
Loss = 3.2755e-01, PNorm = 63.2419, GNorm = 1.8793, lr_0 = 1.1460e-04
Loss = 3.2373e-01, PNorm = 63.2435, GNorm = 1.9469, lr_0 = 1.1452e-04
Loss = 3.2792e-01, PNorm = 63.2446, GNorm = 1.2780, lr_0 = 1.1445e-04
Loss = 3.5720e-01, PNorm = 63.2465, GNorm = 1.3359, lr_0 = 1.1437e-04
Loss = 3.1541e-01, PNorm = 63.2468, GNorm = 1.3766, lr_0 = 1.1429e-04
Loss = 3.6472e-01, PNorm = 63.2484, GNorm = 2.0750, lr_0 = 1.1421e-04
Loss = 3.5425e-01, PNorm = 63.2492, GNorm = 1.4167, lr_0 = 1.1413e-04
Loss = 3.6844e-01, PNorm = 63.2498, GNorm = 1.4476, lr_0 = 1.1405e-04
Loss = 3.8504e-01, PNorm = 63.2513, GNorm = 1.5610, lr_0 = 1.1398e-04
Loss = 3.1275e-01, PNorm = 63.2525, GNorm = 1.8872, lr_0 = 1.1390e-04
Loss = 3.9055e-01, PNorm = 63.2513, GNorm = 1.1560, lr_0 = 1.1382e-04
Loss = 3.0510e-01, PNorm = 63.2520, GNorm = 1.1657, lr_0 = 1.1374e-04
Loss = 3.2902e-01, PNorm = 63.2525, GNorm = 1.4302, lr_0 = 1.1366e-04
Loss = 3.4023e-01, PNorm = 63.2532, GNorm = 1.9274, lr_0 = 1.1359e-04
Loss = 3.1070e-01, PNorm = 63.2540, GNorm = 1.3132, lr_0 = 1.1351e-04
Loss = 3.5390e-01, PNorm = 63.2547, GNorm = 1.3687, lr_0 = 1.1343e-04
Loss = 3.4107e-01, PNorm = 63.2568, GNorm = 1.4329, lr_0 = 1.1335e-04
Loss = 3.8025e-01, PNorm = 63.2582, GNorm = 1.2824, lr_0 = 1.1328e-04
Loss = 3.4516e-01, PNorm = 63.2608, GNorm = 1.8509, lr_0 = 1.1320e-04
Loss = 3.3535e-01, PNorm = 63.2633, GNorm = 1.8229, lr_0 = 1.1312e-04
Loss = 3.8742e-01, PNorm = 63.2633, GNorm = 1.2795, lr_0 = 1.1304e-04
Loss = 3.5326e-01, PNorm = 63.2641, GNorm = 1.8571, lr_0 = 1.1297e-04
Loss = 3.2709e-01, PNorm = 63.2662, GNorm = 1.2983, lr_0 = 1.1289e-04
Loss = 3.3164e-01, PNorm = 63.2667, GNorm = 1.2546, lr_0 = 1.1281e-04
Loss = 3.0244e-01, PNorm = 63.2678, GNorm = 1.5602, lr_0 = 1.1273e-04
Loss = 3.4851e-01, PNorm = 63.2693, GNorm = 2.0187, lr_0 = 1.1266e-04
Loss = 3.5813e-01, PNorm = 63.2703, GNorm = 1.9893, lr_0 = 1.1258e-04
Loss = 3.7460e-01, PNorm = 63.2701, GNorm = 1.6801, lr_0 = 1.1250e-04
Loss = 2.8009e-01, PNorm = 63.2716, GNorm = 1.6496, lr_0 = 1.1243e-04
Loss = 3.7143e-01, PNorm = 63.2722, GNorm = 1.6481, lr_0 = 1.1235e-04
Loss = 3.8801e-01, PNorm = 63.2720, GNorm = 1.3916, lr_0 = 1.1227e-04
Loss = 3.7561e-01, PNorm = 63.2717, GNorm = 1.7194, lr_0 = 1.1219e-04
Loss = 2.9000e-01, PNorm = 63.2732, GNorm = 1.4470, lr_0 = 1.1212e-04
Loss = 3.7532e-01, PNorm = 63.2741, GNorm = 1.3078, lr_0 = 1.1204e-04
Loss = 3.2910e-01, PNorm = 63.2739, GNorm = 1.6966, lr_0 = 1.1196e-04
Loss = 2.9601e-01, PNorm = 63.2746, GNorm = 1.3288, lr_0 = 1.1189e-04
Loss = 3.4363e-01, PNorm = 63.2748, GNorm = 1.9747, lr_0 = 1.1181e-04
Loss = 3.3918e-01, PNorm = 63.2752, GNorm = 1.3521, lr_0 = 1.1173e-04
Loss = 3.1061e-01, PNorm = 63.2754, GNorm = 1.3161, lr_0 = 1.1166e-04
Loss = 3.3897e-01, PNorm = 63.2765, GNorm = 1.8780, lr_0 = 1.1158e-04
Loss = 3.2328e-01, PNorm = 63.2790, GNorm = 1.2826, lr_0 = 1.1150e-04
Loss = 4.0732e-01, PNorm = 63.2800, GNorm = 1.6558, lr_0 = 1.1143e-04
Loss = 3.5293e-01, PNorm = 63.2815, GNorm = 1.7770, lr_0 = 1.1135e-04
Loss = 3.2209e-01, PNorm = 63.2820, GNorm = 2.0067, lr_0 = 1.1128e-04
Loss = 3.3334e-01, PNorm = 63.2819, GNorm = 1.2832, lr_0 = 1.1120e-04
Loss = 3.7766e-01, PNorm = 63.2831, GNorm = 1.7431, lr_0 = 1.1112e-04
Loss = 3.3526e-01, PNorm = 63.2838, GNorm = 0.9872, lr_0 = 1.1105e-04
Loss = 3.5574e-01, PNorm = 63.2828, GNorm = 1.2905, lr_0 = 1.1097e-04
Loss = 3.8725e-01, PNorm = 63.2839, GNorm = 1.9959, lr_0 = 1.1089e-04
Loss = 3.2530e-01, PNorm = 63.2861, GNorm = 1.3438, lr_0 = 1.1082e-04
Loss = 3.3390e-01, PNorm = 63.2867, GNorm = 1.1516, lr_0 = 1.1074e-04
Loss = 3.2294e-01, PNorm = 63.2865, GNorm = 1.0458, lr_0 = 1.1067e-04
Loss = 3.2051e-01, PNorm = 63.2883, GNorm = 1.1089, lr_0 = 1.1059e-04
Loss = 3.1168e-01, PNorm = 63.2907, GNorm = 1.2212, lr_0 = 1.1052e-04
Loss = 3.5502e-01, PNorm = 63.2916, GNorm = 1.5749, lr_0 = 1.1044e-04
Loss = 3.0840e-01, PNorm = 63.2923, GNorm = 1.3942, lr_0 = 1.1036e-04
Loss = 3.1786e-01, PNorm = 63.2930, GNorm = 1.1792, lr_0 = 1.1029e-04
Loss = 3.9519e-01, PNorm = 63.2935, GNorm = 1.3657, lr_0 = 1.1021e-04
Loss = 3.4480e-01, PNorm = 63.2952, GNorm = 1.4779, lr_0 = 1.1014e-04
Loss = 3.0816e-01, PNorm = 63.2976, GNorm = 1.6933, lr_0 = 1.1006e-04
Loss = 3.5300e-01, PNorm = 63.2974, GNorm = 1.2887, lr_0 = 1.0999e-04
Loss = 3.5129e-01, PNorm = 63.2990, GNorm = 1.2433, lr_0 = 1.0991e-04
Loss = 3.3272e-01, PNorm = 63.3012, GNorm = 1.2929, lr_0 = 1.0984e-04
Loss = 3.4029e-01, PNorm = 63.3018, GNorm = 1.3391, lr_0 = 1.0976e-04
Loss = 3.1285e-01, PNorm = 63.3028, GNorm = 1.2147, lr_0 = 1.0969e-04
Loss = 3.4497e-01, PNorm = 63.3035, GNorm = 1.2617, lr_0 = 1.0961e-04
Loss = 3.7917e-01, PNorm = 63.3032, GNorm = 1.7177, lr_0 = 1.0954e-04
Loss = 3.5010e-01, PNorm = 63.3045, GNorm = 1.7323, lr_0 = 1.0946e-04
Loss = 3.3008e-01, PNorm = 63.3068, GNorm = 1.5356, lr_0 = 1.0939e-04
Loss = 3.4352e-01, PNorm = 63.3063, GNorm = 1.4503, lr_0 = 1.0931e-04
Loss = 3.5565e-01, PNorm = 63.3045, GNorm = 1.3289, lr_0 = 1.0924e-04
Loss = 3.5261e-01, PNorm = 63.3046, GNorm = 1.0827, lr_0 = 1.0916e-04
Loss = 3.5662e-01, PNorm = 63.3052, GNorm = 1.3211, lr_0 = 1.0909e-04
Loss = 3.6738e-01, PNorm = 63.3060, GNorm = 2.1063, lr_0 = 1.0901e-04
Loss = 3.3555e-01, PNorm = 63.3077, GNorm = 1.4936, lr_0 = 1.0894e-04
Loss = 3.5448e-01, PNorm = 63.3073, GNorm = 1.6110, lr_0 = 1.0886e-04
Loss = 3.2122e-01, PNorm = 63.3084, GNorm = 1.4265, lr_0 = 1.0879e-04
Loss = 3.5277e-01, PNorm = 63.3095, GNorm = 1.7105, lr_0 = 1.0871e-04
Loss = 3.7441e-01, PNorm = 63.3112, GNorm = 1.9014, lr_0 = 1.0864e-04
Loss = 3.8961e-01, PNorm = 63.3123, GNorm = 1.3548, lr_0 = 1.0856e-04
Validation mae = 0.110821
Epoch 29
Loss = 3.7832e-01, PNorm = 63.3137, GNorm = 1.5279, lr_0 = 1.0849e-04
Loss = 3.2838e-01, PNorm = 63.3155, GNorm = 1.5398, lr_0 = 1.0841e-04
Loss = 3.1159e-01, PNorm = 63.3160, GNorm = 1.3783, lr_0 = 1.0834e-04
Loss = 3.1874e-01, PNorm = 63.3161, GNorm = 1.2493, lr_0 = 1.0827e-04
Loss = 3.2469e-01, PNorm = 63.3159, GNorm = 1.0818, lr_0 = 1.0819e-04
Loss = 3.2524e-01, PNorm = 63.3158, GNorm = 1.6282, lr_0 = 1.0812e-04
Loss = 3.1379e-01, PNorm = 63.3165, GNorm = 1.6348, lr_0 = 1.0804e-04
Loss = 3.2585e-01, PNorm = 63.3187, GNorm = 1.3977, lr_0 = 1.0797e-04
Loss = 3.5488e-01, PNorm = 63.3190, GNorm = 1.6433, lr_0 = 1.0790e-04
Loss = 3.3211e-01, PNorm = 63.3193, GNorm = 1.5588, lr_0 = 1.0782e-04
Loss = 3.1147e-01, PNorm = 63.3192, GNorm = 1.3999, lr_0 = 1.0775e-04
Loss = 3.9075e-01, PNorm = 63.3207, GNorm = 2.3232, lr_0 = 1.0767e-04
Loss = 3.5934e-01, PNorm = 63.3218, GNorm = 1.8527, lr_0 = 1.0760e-04
Loss = 3.6675e-01, PNorm = 63.3221, GNorm = 1.3300, lr_0 = 1.0753e-04
Loss = 3.4813e-01, PNorm = 63.3230, GNorm = 1.5133, lr_0 = 1.0745e-04
Loss = 3.5498e-01, PNorm = 63.3238, GNorm = 1.7814, lr_0 = 1.0738e-04
Loss = 3.0620e-01, PNorm = 63.3263, GNorm = 1.2802, lr_0 = 1.0731e-04
Loss = 3.4846e-01, PNorm = 63.3276, GNorm = 1.2889, lr_0 = 1.0723e-04
Loss = 3.5496e-01, PNorm = 63.3289, GNorm = 1.8406, lr_0 = 1.0716e-04
Loss = 3.0758e-01, PNorm = 63.3291, GNorm = 1.7510, lr_0 = 1.0709e-04
Loss = 4.2063e-01, PNorm = 63.3302, GNorm = 3.2388, lr_0 = 1.0701e-04
Loss = 3.4704e-01, PNorm = 63.3310, GNorm = 1.6449, lr_0 = 1.0694e-04
Loss = 3.7992e-01, PNorm = 63.3324, GNorm = 1.6163, lr_0 = 1.0687e-04
Loss = 3.2964e-01, PNorm = 63.3318, GNorm = 1.4606, lr_0 = 1.0679e-04
Loss = 3.4618e-01, PNorm = 63.3323, GNorm = 1.3805, lr_0 = 1.0672e-04
Loss = 3.4556e-01, PNorm = 63.3339, GNorm = 0.9980, lr_0 = 1.0665e-04
Loss = 3.2668e-01, PNorm = 63.3345, GNorm = 1.5331, lr_0 = 1.0657e-04
Loss = 3.9296e-01, PNorm = 63.3347, GNorm = 1.6894, lr_0 = 1.0650e-04
Loss = 3.1917e-01, PNorm = 63.3351, GNorm = 1.6570, lr_0 = 1.0643e-04
Loss = 2.8506e-01, PNorm = 63.3357, GNorm = 1.2352, lr_0 = 1.0635e-04
Loss = 3.3245e-01, PNorm = 63.3361, GNorm = 1.8222, lr_0 = 1.0628e-04
Loss = 3.5123e-01, PNorm = 63.3371, GNorm = 1.4451, lr_0 = 1.0621e-04
Loss = 2.9154e-01, PNorm = 63.3388, GNorm = 1.6916, lr_0 = 1.0614e-04
Loss = 3.4736e-01, PNorm = 63.3401, GNorm = 1.4147, lr_0 = 1.0606e-04
Loss = 3.3760e-01, PNorm = 63.3426, GNorm = 1.1328, lr_0 = 1.0599e-04
Loss = 3.6394e-01, PNorm = 63.3449, GNorm = 1.1493, lr_0 = 1.0592e-04
Loss = 3.8194e-01, PNorm = 63.3467, GNorm = 1.7497, lr_0 = 1.0585e-04
Loss = 3.1540e-01, PNorm = 63.3477, GNorm = 1.2626, lr_0 = 1.0577e-04
Loss = 2.8614e-01, PNorm = 63.3483, GNorm = 1.2449, lr_0 = 1.0570e-04
Loss = 3.6136e-01, PNorm = 63.3488, GNorm = 1.8570, lr_0 = 1.0563e-04
Loss = 3.5080e-01, PNorm = 63.3485, GNorm = 1.5294, lr_0 = 1.0556e-04
Loss = 3.3476e-01, PNorm = 63.3496, GNorm = 1.4139, lr_0 = 1.0548e-04
Loss = 3.5894e-01, PNorm = 63.3513, GNorm = 1.3547, lr_0 = 1.0541e-04
Loss = 3.2214e-01, PNorm = 63.3518, GNorm = 1.5159, lr_0 = 1.0534e-04
Loss = 3.2334e-01, PNorm = 63.3515, GNorm = 1.9911, lr_0 = 1.0527e-04
Loss = 4.0444e-01, PNorm = 63.3513, GNorm = 1.4988, lr_0 = 1.0519e-04
Loss = 3.5937e-01, PNorm = 63.3514, GNorm = 1.6573, lr_0 = 1.0512e-04
Loss = 3.4733e-01, PNorm = 63.3538, GNorm = 1.3673, lr_0 = 1.0505e-04
Loss = 3.3386e-01, PNorm = 63.3548, GNorm = 1.7798, lr_0 = 1.0498e-04
Loss = 3.2337e-01, PNorm = 63.3562, GNorm = 1.4347, lr_0 = 1.0491e-04
Loss = 3.6277e-01, PNorm = 63.3574, GNorm = 1.3990, lr_0 = 1.0483e-04
Loss = 3.4712e-01, PNorm = 63.3595, GNorm = 1.4275, lr_0 = 1.0476e-04
Loss = 3.1359e-01, PNorm = 63.3618, GNorm = 1.4718, lr_0 = 1.0469e-04
Loss = 3.1957e-01, PNorm = 63.3635, GNorm = 1.1644, lr_0 = 1.0462e-04
Loss = 3.4133e-01, PNorm = 63.3633, GNorm = 1.1073, lr_0 = 1.0455e-04
Loss = 3.5552e-01, PNorm = 63.3624, GNorm = 1.4259, lr_0 = 1.0448e-04
Loss = 3.4685e-01, PNorm = 63.3623, GNorm = 1.1456, lr_0 = 1.0440e-04
Loss = 3.4179e-01, PNorm = 63.3630, GNorm = 1.3427, lr_0 = 1.0433e-04
Loss = 3.2658e-01, PNorm = 63.3647, GNorm = 1.7458, lr_0 = 1.0426e-04
Loss = 3.1994e-01, PNorm = 63.3668, GNorm = 1.8840, lr_0 = 1.0419e-04
Loss = 3.9490e-01, PNorm = 63.3672, GNorm = 1.7484, lr_0 = 1.0412e-04
Loss = 3.9875e-01, PNorm = 63.3676, GNorm = 1.8734, lr_0 = 1.0405e-04
Loss = 3.9961e-01, PNorm = 63.3687, GNorm = 1.5591, lr_0 = 1.0398e-04
Loss = 3.4986e-01, PNorm = 63.3705, GNorm = 1.2389, lr_0 = 1.0391e-04
Loss = 3.2038e-01, PNorm = 63.3723, GNorm = 1.8516, lr_0 = 1.0383e-04
Loss = 3.4794e-01, PNorm = 63.3723, GNorm = 1.9101, lr_0 = 1.0376e-04
Loss = 3.9775e-01, PNorm = 63.3729, GNorm = 2.1617, lr_0 = 1.0369e-04
Loss = 2.9434e-01, PNorm = 63.3739, GNorm = 1.5156, lr_0 = 1.0362e-04
Loss = 3.0735e-01, PNorm = 63.3737, GNorm = 1.6418, lr_0 = 1.0355e-04
Loss = 3.0295e-01, PNorm = 63.3741, GNorm = 1.0093, lr_0 = 1.0348e-04
Loss = 3.5224e-01, PNorm = 63.3744, GNorm = 1.5011, lr_0 = 1.0341e-04
Loss = 3.4245e-01, PNorm = 63.3752, GNorm = 1.4048, lr_0 = 1.0334e-04
Loss = 3.4239e-01, PNorm = 63.3757, GNorm = 1.7850, lr_0 = 1.0327e-04
Loss = 3.0861e-01, PNorm = 63.3762, GNorm = 1.7622, lr_0 = 1.0320e-04
Loss = 3.6750e-01, PNorm = 63.3776, GNorm = 1.4555, lr_0 = 1.0312e-04
Loss = 3.5042e-01, PNorm = 63.3780, GNorm = 1.7771, lr_0 = 1.0305e-04
Loss = 3.0510e-01, PNorm = 63.3784, GNorm = 1.0518, lr_0 = 1.0298e-04
Loss = 3.5212e-01, PNorm = 63.3796, GNorm = 1.6686, lr_0 = 1.0291e-04
Loss = 3.1881e-01, PNorm = 63.3807, GNorm = 1.1239, lr_0 = 1.0284e-04
Loss = 3.2714e-01, PNorm = 63.3804, GNorm = 1.4000, lr_0 = 1.0277e-04
Loss = 3.4941e-01, PNorm = 63.3795, GNorm = 2.0891, lr_0 = 1.0270e-04
Loss = 3.4763e-01, PNorm = 63.3797, GNorm = 1.6124, lr_0 = 1.0263e-04
Loss = 3.8702e-01, PNorm = 63.3801, GNorm = 1.4668, lr_0 = 1.0256e-04
Loss = 3.2233e-01, PNorm = 63.3809, GNorm = 1.6272, lr_0 = 1.0249e-04
Loss = 3.7593e-01, PNorm = 63.3813, GNorm = 1.4951, lr_0 = 1.0242e-04
Loss = 2.6984e-01, PNorm = 63.3828, GNorm = 1.4600, lr_0 = 1.0235e-04
Loss = 3.5912e-01, PNorm = 63.3843, GNorm = 1.4077, lr_0 = 1.0228e-04
Loss = 3.0550e-01, PNorm = 63.3835, GNorm = 1.3533, lr_0 = 1.0221e-04
Loss = 3.3990e-01, PNorm = 63.3834, GNorm = 1.1906, lr_0 = 1.0214e-04
Loss = 3.7149e-01, PNorm = 63.3839, GNorm = 1.1806, lr_0 = 1.0207e-04
Loss = 4.1374e-01, PNorm = 63.3848, GNorm = 2.2106, lr_0 = 1.0200e-04
Loss = 3.6287e-01, PNorm = 63.3863, GNorm = 1.4012, lr_0 = 1.0193e-04
Loss = 3.7936e-01, PNorm = 63.3884, GNorm = 1.6773, lr_0 = 1.0186e-04
Loss = 3.3596e-01, PNorm = 63.3898, GNorm = 1.3281, lr_0 = 1.0179e-04
Loss = 3.5566e-01, PNorm = 63.3896, GNorm = 1.3913, lr_0 = 1.0172e-04
Loss = 3.5297e-01, PNorm = 63.3899, GNorm = 1.3975, lr_0 = 1.0165e-04
Loss = 3.2250e-01, PNorm = 63.3912, GNorm = 1.3905, lr_0 = 1.0158e-04
Loss = 3.0120e-01, PNorm = 63.3927, GNorm = 1.8574, lr_0 = 1.0151e-04
Loss = 3.5144e-01, PNorm = 63.3925, GNorm = 1.5791, lr_0 = 1.0144e-04
Loss = 3.3576e-01, PNorm = 63.3938, GNorm = 1.7041, lr_0 = 1.0137e-04
Loss = 3.5845e-01, PNorm = 63.3958, GNorm = 1.3868, lr_0 = 1.0130e-04
Loss = 3.4111e-01, PNorm = 63.3968, GNorm = 1.4696, lr_0 = 1.0123e-04
Loss = 4.0857e-01, PNorm = 63.3983, GNorm = 1.9175, lr_0 = 1.0116e-04
Loss = 3.6128e-01, PNorm = 63.3984, GNorm = 1.5369, lr_0 = 1.0110e-04
Loss = 3.0266e-01, PNorm = 63.3990, GNorm = 1.0460, lr_0 = 1.0103e-04
Loss = 2.9380e-01, PNorm = 63.4005, GNorm = 1.1918, lr_0 = 1.0096e-04
Loss = 3.5538e-01, PNorm = 63.4010, GNorm = 1.4819, lr_0 = 1.0089e-04
Loss = 3.1361e-01, PNorm = 63.4012, GNorm = 1.4671, lr_0 = 1.0082e-04
Loss = 3.3089e-01, PNorm = 63.4021, GNorm = 1.2397, lr_0 = 1.0075e-04
Loss = 3.1388e-01, PNorm = 63.4035, GNorm = 1.0786, lr_0 = 1.0068e-04
Loss = 3.6479e-01, PNorm = 63.4039, GNorm = 1.2599, lr_0 = 1.0061e-04
Loss = 3.4698e-01, PNorm = 63.4035, GNorm = 1.2328, lr_0 = 1.0054e-04
Loss = 3.4875e-01, PNorm = 63.4038, GNorm = 1.6485, lr_0 = 1.0047e-04
Loss = 3.3419e-01, PNorm = 63.4046, GNorm = 1.3673, lr_0 = 1.0041e-04
Loss = 4.2074e-01, PNorm = 63.4059, GNorm = 1.6004, lr_0 = 1.0034e-04
Loss = 3.1596e-01, PNorm = 63.4074, GNorm = 1.5047, lr_0 = 1.0027e-04
Loss = 3.5460e-01, PNorm = 63.4069, GNorm = 1.4773, lr_0 = 1.0020e-04
Loss = 3.1728e-01, PNorm = 63.4071, GNorm = 1.4639, lr_0 = 1.0013e-04
Loss = 3.5058e-01, PNorm = 63.4082, GNorm = 1.6641, lr_0 = 1.0006e-04
Loss = 3.4578e-01, PNorm = 63.4098, GNorm = 1.4096, lr_0 = 1.0000e-04
Validation mae = 0.111513
Model 0 best validation mae = 0.110650 on epoch 25
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.110571
Ensemble test mae = 0.110571
Fold 6
Splitting data with seed 6
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=300, bias=False)
        (W_h): Linear(in_features=300, out_features=300, bias=False)
        (W_o): Linear(in_features=433, out_features=300, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.2, inplace=False)
    (1): Linear(in_features=300, out_features=300, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=300, out_features=300, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.2, inplace=False)
    (7): Linear(in_features=300, out_features=1, bias=True)
  )
)
Number of parameters = 445,201
Moving model to cuda
Epoch 0
Loss = 1.0364e+00, PNorm = 38.1703, GNorm = 3.4805, lr_0 = 1.0413e-04
Loss = 1.0833e+00, PNorm = 38.1701, GNorm = 2.9560, lr_0 = 1.0788e-04
Loss = 1.1063e+00, PNorm = 38.1702, GNorm = 2.8401, lr_0 = 1.1163e-04
Loss = 8.7919e-01, PNorm = 38.1705, GNorm = 4.4629, lr_0 = 1.1537e-04
Loss = 8.7892e-01, PNorm = 38.1713, GNorm = 2.1332, lr_0 = 1.1913e-04
Loss = 8.8577e-01, PNorm = 38.1722, GNorm = 3.2392, lr_0 = 1.2287e-04
Loss = 9.6307e-01, PNorm = 38.1728, GNorm = 2.7127, lr_0 = 1.2663e-04
Loss = 8.5885e-01, PNorm = 38.1736, GNorm = 2.7242, lr_0 = 1.3038e-04
Loss = 9.5239e-01, PNorm = 38.1747, GNorm = 2.2800, lr_0 = 1.3413e-04
Loss = 8.2229e-01, PNorm = 38.1754, GNorm = 3.2950, lr_0 = 1.3788e-04
Loss = 8.5803e-01, PNorm = 38.1764, GNorm = 1.7762, lr_0 = 1.4163e-04
Loss = 9.2138e-01, PNorm = 38.1777, GNorm = 2.6963, lr_0 = 1.4537e-04
Loss = 7.6841e-01, PNorm = 38.1793, GNorm = 2.6724, lr_0 = 1.4913e-04
Loss = 8.7698e-01, PNorm = 38.1813, GNorm = 2.3732, lr_0 = 1.5288e-04
Loss = 8.9039e-01, PNorm = 38.1838, GNorm = 2.8500, lr_0 = 1.5662e-04
Loss = 8.6116e-01, PNorm = 38.1861, GNorm = 3.4524, lr_0 = 1.6038e-04
Loss = 7.6300e-01, PNorm = 38.1880, GNorm = 2.1089, lr_0 = 1.6412e-04
Loss = 7.4215e-01, PNorm = 38.1899, GNorm = 6.3425, lr_0 = 1.6788e-04
Loss = 7.9094e-01, PNorm = 38.1909, GNorm = 2.4972, lr_0 = 1.7163e-04
Loss = 9.9113e-01, PNorm = 38.1930, GNorm = 7.0254, lr_0 = 1.7538e-04
Loss = 8.5708e-01, PNorm = 38.1951, GNorm = 5.9979, lr_0 = 1.7913e-04
Loss = 8.1069e-01, PNorm = 38.1974, GNorm = 2.6932, lr_0 = 1.8288e-04
Loss = 7.0354e-01, PNorm = 38.1996, GNorm = 1.8157, lr_0 = 1.8662e-04
Loss = 7.8011e-01, PNorm = 38.2029, GNorm = 3.1540, lr_0 = 1.9038e-04
Loss = 7.0739e-01, PNorm = 38.2061, GNorm = 3.8817, lr_0 = 1.9413e-04
Loss = 7.0906e-01, PNorm = 38.2099, GNorm = 3.0274, lr_0 = 1.9788e-04
Loss = 8.3573e-01, PNorm = 38.2124, GNorm = 1.8065, lr_0 = 2.0163e-04
Loss = 8.3114e-01, PNorm = 38.2161, GNorm = 1.7348, lr_0 = 2.0537e-04
Loss = 7.8455e-01, PNorm = 38.2209, GNorm = 4.3669, lr_0 = 2.0913e-04
Loss = 7.4107e-01, PNorm = 38.2237, GNorm = 3.3937, lr_0 = 2.1288e-04
Loss = 7.4752e-01, PNorm = 38.2262, GNorm = 8.6194, lr_0 = 2.1663e-04
Loss = 7.7648e-01, PNorm = 38.2296, GNorm = 3.5077, lr_0 = 2.2038e-04
Loss = 7.3024e-01, PNorm = 38.2338, GNorm = 5.1554, lr_0 = 2.2412e-04
Loss = 7.7977e-01, PNorm = 38.2371, GNorm = 10.2119, lr_0 = 2.2787e-04
Loss = 6.7202e-01, PNorm = 38.2406, GNorm = 2.5700, lr_0 = 2.3163e-04
Loss = 7.1851e-01, PNorm = 38.2434, GNorm = 4.2978, lr_0 = 2.3538e-04
Loss = 7.3575e-01, PNorm = 38.2469, GNorm = 4.8765, lr_0 = 2.3913e-04
Loss = 7.8430e-01, PNorm = 38.2490, GNorm = 2.9170, lr_0 = 2.4288e-04
Loss = 6.7563e-01, PNorm = 38.2523, GNorm = 4.4569, lr_0 = 2.4662e-04
Loss = 7.2133e-01, PNorm = 38.2573, GNorm = 6.8422, lr_0 = 2.5038e-04
Loss = 7.5349e-01, PNorm = 38.2618, GNorm = 2.7760, lr_0 = 2.5413e-04
Loss = 6.3197e-01, PNorm = 38.2641, GNorm = 5.1637, lr_0 = 2.5788e-04
Loss = 6.6242e-01, PNorm = 38.2670, GNorm = 2.0904, lr_0 = 2.6163e-04
Loss = 6.4440e-01, PNorm = 38.2712, GNorm = 8.9468, lr_0 = 2.6537e-04
Loss = 6.5279e-01, PNorm = 38.2741, GNorm = 2.6148, lr_0 = 2.6912e-04
Loss = 6.0650e-01, PNorm = 38.2787, GNorm = 2.3110, lr_0 = 2.7288e-04
Loss = 6.6897e-01, PNorm = 38.2798, GNorm = 6.8767, lr_0 = 2.7663e-04
Loss = 7.2662e-01, PNorm = 38.2839, GNorm = 4.2529, lr_0 = 2.8038e-04
Loss = 6.9645e-01, PNorm = 38.2875, GNorm = 2.6395, lr_0 = 2.8413e-04
Loss = 6.4933e-01, PNorm = 38.2898, GNorm = 2.3886, lr_0 = 2.8787e-04
Loss = 7.1401e-01, PNorm = 38.2932, GNorm = 2.5946, lr_0 = 2.9163e-04
Loss = 8.4123e-01, PNorm = 38.2958, GNorm = 7.3740, lr_0 = 2.9538e-04
Loss = 6.7953e-01, PNorm = 38.2973, GNorm = 1.9855, lr_0 = 2.9913e-04
Loss = 6.2866e-01, PNorm = 38.3008, GNorm = 1.8629, lr_0 = 3.0288e-04
Loss = 7.2287e-01, PNorm = 38.3044, GNorm = 2.2082, lr_0 = 3.0662e-04
Loss = 7.2811e-01, PNorm = 38.3113, GNorm = 3.5351, lr_0 = 3.1037e-04
Loss = 6.4165e-01, PNorm = 38.3135, GNorm = 1.4467, lr_0 = 3.1413e-04
Loss = 6.5016e-01, PNorm = 38.3170, GNorm = 2.9908, lr_0 = 3.1788e-04
Loss = 7.0184e-01, PNorm = 38.3216, GNorm = 2.3196, lr_0 = 3.2163e-04
Loss = 7.8554e-01, PNorm = 38.3268, GNorm = 3.5826, lr_0 = 3.2538e-04
Loss = 6.8115e-01, PNorm = 38.3334, GNorm = 2.7334, lr_0 = 3.2912e-04
Loss = 6.3455e-01, PNorm = 38.3375, GNorm = 3.0434, lr_0 = 3.3288e-04
Loss = 7.3930e-01, PNorm = 38.3426, GNorm = 4.3652, lr_0 = 3.3663e-04
Loss = 6.0585e-01, PNorm = 38.3481, GNorm = 4.0419, lr_0 = 3.4038e-04
Loss = 6.2174e-01, PNorm = 38.3527, GNorm = 3.3344, lr_0 = 3.4413e-04
Loss = 6.7978e-01, PNorm = 38.3588, GNorm = 2.5416, lr_0 = 3.4787e-04
Loss = 6.5896e-01, PNorm = 38.3605, GNorm = 3.6157, lr_0 = 3.5162e-04
Loss = 6.4626e-01, PNorm = 38.3643, GNorm = 4.8683, lr_0 = 3.5538e-04
Loss = 6.7446e-01, PNorm = 38.3698, GNorm = 2.2045, lr_0 = 3.5913e-04
Loss = 6.7351e-01, PNorm = 38.3744, GNorm = 1.6028, lr_0 = 3.6288e-04
Loss = 7.4544e-01, PNorm = 38.3800, GNorm = 2.9035, lr_0 = 3.6662e-04
Loss = 6.7062e-01, PNorm = 38.3857, GNorm = 2.5481, lr_0 = 3.7037e-04
Loss = 5.4568e-01, PNorm = 38.3933, GNorm = 1.3128, lr_0 = 3.7413e-04
Loss = 6.3921e-01, PNorm = 38.3964, GNorm = 8.3564, lr_0 = 3.7788e-04
Loss = 6.8515e-01, PNorm = 38.3974, GNorm = 1.3689, lr_0 = 3.8163e-04
Loss = 6.3252e-01, PNorm = 38.4031, GNorm = 1.1195, lr_0 = 3.8537e-04
Loss = 5.9164e-01, PNorm = 38.4109, GNorm = 4.9975, lr_0 = 3.8912e-04
Loss = 6.3761e-01, PNorm = 38.4165, GNorm = 3.7545, lr_0 = 3.9287e-04
Loss = 5.9180e-01, PNorm = 38.4224, GNorm = 5.3229, lr_0 = 3.9663e-04
Loss = 6.9025e-01, PNorm = 38.4269, GNorm = 3.4148, lr_0 = 4.0038e-04
Loss = 6.8765e-01, PNorm = 38.4305, GNorm = 3.2450, lr_0 = 4.0413e-04
Loss = 5.5295e-01, PNorm = 38.4387, GNorm = 5.0159, lr_0 = 4.0787e-04
Loss = 6.6567e-01, PNorm = 38.4455, GNorm = 2.0700, lr_0 = 4.1162e-04
Loss = 6.9225e-01, PNorm = 38.4509, GNorm = 6.2277, lr_0 = 4.1537e-04
Loss = 6.5133e-01, PNorm = 38.4544, GNorm = 2.0708, lr_0 = 4.1913e-04
Loss = 6.6217e-01, PNorm = 38.4617, GNorm = 2.5152, lr_0 = 4.2288e-04
Loss = 5.8706e-01, PNorm = 38.4702, GNorm = 3.0468, lr_0 = 4.2662e-04
Loss = 6.2777e-01, PNorm = 38.4718, GNorm = 4.7301, lr_0 = 4.3037e-04
Loss = 6.4275e-01, PNorm = 38.4801, GNorm = 4.5559, lr_0 = 4.3412e-04
Loss = 6.0398e-01, PNorm = 38.4874, GNorm = 8.5429, lr_0 = 4.3788e-04
Loss = 7.3959e-01, PNorm = 38.4947, GNorm = 10.7483, lr_0 = 4.4163e-04
Loss = 6.8738e-01, PNorm = 38.5019, GNorm = 4.1780, lr_0 = 4.4538e-04
Loss = 6.6520e-01, PNorm = 38.5101, GNorm = 4.0190, lr_0 = 4.4912e-04
Loss = 5.4842e-01, PNorm = 38.5174, GNorm = 3.2343, lr_0 = 4.5287e-04
Loss = 7.1568e-01, PNorm = 38.5282, GNorm = 8.3295, lr_0 = 4.5662e-04
Loss = 6.7293e-01, PNorm = 38.5370, GNorm = 4.3431, lr_0 = 4.6038e-04
Loss = 6.2948e-01, PNorm = 38.5423, GNorm = 6.1569, lr_0 = 4.6413e-04
Loss = 6.3616e-01, PNorm = 38.5480, GNorm = 2.1603, lr_0 = 4.6787e-04
Loss = 6.3048e-01, PNorm = 38.5595, GNorm = 6.9716, lr_0 = 4.7162e-04
Loss = 6.2688e-01, PNorm = 38.5648, GNorm = 2.6086, lr_0 = 4.7537e-04
Loss = 5.9912e-01, PNorm = 38.5717, GNorm = 1.5242, lr_0 = 4.7913e-04
Loss = 6.0892e-01, PNorm = 38.5818, GNorm = 3.6708, lr_0 = 4.8288e-04
Loss = 6.5162e-01, PNorm = 38.5902, GNorm = 2.1986, lr_0 = 4.8663e-04
Loss = 6.4542e-01, PNorm = 38.5956, GNorm = 3.8040, lr_0 = 4.9038e-04
Loss = 6.6177e-01, PNorm = 38.6012, GNorm = 4.9901, lr_0 = 4.9412e-04
Loss = 5.7032e-01, PNorm = 38.6097, GNorm = 2.8178, lr_0 = 4.9788e-04
Loss = 7.4331e-01, PNorm = 38.6154, GNorm = 1.8578, lr_0 = 5.0163e-04
Loss = 6.8392e-01, PNorm = 38.6235, GNorm = 2.7273, lr_0 = 5.0538e-04
Loss = 6.9630e-01, PNorm = 38.6329, GNorm = 6.9934, lr_0 = 5.0913e-04
Loss = 7.0539e-01, PNorm = 38.6417, GNorm = 3.3428, lr_0 = 5.1287e-04
Loss = 6.6731e-01, PNorm = 38.6535, GNorm = 3.9634, lr_0 = 5.1663e-04
Loss = 6.0210e-01, PNorm = 38.6625, GNorm = 1.9011, lr_0 = 5.2038e-04
Loss = 6.3326e-01, PNorm = 38.6668, GNorm = 4.1887, lr_0 = 5.2413e-04
Loss = 6.3832e-01, PNorm = 38.6747, GNorm = 1.8587, lr_0 = 5.2788e-04
Loss = 6.5769e-01, PNorm = 38.6884, GNorm = 2.3657, lr_0 = 5.3162e-04
Loss = 7.3278e-01, PNorm = 38.6947, GNorm = 2.2978, lr_0 = 5.3538e-04
Loss = 7.0305e-01, PNorm = 38.7091, GNorm = 2.1509, lr_0 = 5.3912e-04
Loss = 6.4738e-01, PNorm = 38.7234, GNorm = 1.4202, lr_0 = 5.4288e-04
Loss = 6.1619e-01, PNorm = 38.7284, GNorm = 3.3553, lr_0 = 5.4663e-04
Loss = 6.3301e-01, PNorm = 38.7382, GNorm = 4.5904, lr_0 = 5.5038e-04
Validation mae = 0.140168
Epoch 1
Loss = 6.9266e-01, PNorm = 38.7487, GNorm = 2.6273, lr_0 = 5.5413e-04
Loss = 6.6915e-01, PNorm = 38.7627, GNorm = 2.1550, lr_0 = 5.5787e-04
Loss = 5.7741e-01, PNorm = 38.7739, GNorm = 2.6398, lr_0 = 5.6163e-04
Loss = 6.9007e-01, PNorm = 38.7858, GNorm = 2.5617, lr_0 = 5.6538e-04
Loss = 6.0040e-01, PNorm = 38.7978, GNorm = 1.6474, lr_0 = 5.6913e-04
Loss = 5.9156e-01, PNorm = 38.8055, GNorm = 1.5350, lr_0 = 5.7288e-04
Loss = 5.7810e-01, PNorm = 38.8202, GNorm = 3.5704, lr_0 = 5.7662e-04
Loss = 6.5177e-01, PNorm = 38.8239, GNorm = 2.7466, lr_0 = 5.8038e-04
Loss = 6.2863e-01, PNorm = 38.8342, GNorm = 3.9094, lr_0 = 5.8413e-04
Loss = 6.2596e-01, PNorm = 38.8494, GNorm = 1.4918, lr_0 = 5.8788e-04
Loss = 6.3467e-01, PNorm = 38.8623, GNorm = 4.2011, lr_0 = 5.9163e-04
Loss = 6.0185e-01, PNorm = 38.8681, GNorm = 3.4931, lr_0 = 5.9538e-04
Loss = 5.6905e-01, PNorm = 38.8794, GNorm = 4.1797, lr_0 = 5.9913e-04
Loss = 5.7389e-01, PNorm = 38.8908, GNorm = 2.8309, lr_0 = 6.0288e-04
Loss = 6.6878e-01, PNorm = 38.8961, GNorm = 1.5419, lr_0 = 6.0663e-04
Loss = 5.9958e-01, PNorm = 38.9076, GNorm = 1.8013, lr_0 = 6.1038e-04
Loss = 5.5732e-01, PNorm = 38.9175, GNorm = 1.2598, lr_0 = 6.1413e-04
Loss = 6.1434e-01, PNorm = 38.9325, GNorm = 2.4499, lr_0 = 6.1788e-04
Loss = 5.6454e-01, PNorm = 38.9396, GNorm = 2.0270, lr_0 = 6.2163e-04
Loss = 7.6129e-01, PNorm = 38.9522, GNorm = 6.7144, lr_0 = 6.2538e-04
Loss = 6.3319e-01, PNorm = 38.9666, GNorm = 2.1258, lr_0 = 6.2913e-04
Loss = 6.3610e-01, PNorm = 38.9802, GNorm = 2.8223, lr_0 = 6.3288e-04
Loss = 5.9098e-01, PNorm = 38.9960, GNorm = 1.7081, lr_0 = 6.3663e-04
Loss = 5.6613e-01, PNorm = 39.0112, GNorm = 2.3784, lr_0 = 6.4038e-04
Loss = 6.8088e-01, PNorm = 39.0208, GNorm = 3.2548, lr_0 = 6.4413e-04
Loss = 5.7990e-01, PNorm = 39.0365, GNorm = 3.3138, lr_0 = 6.4788e-04
Loss = 5.8382e-01, PNorm = 39.0559, GNorm = 2.0370, lr_0 = 6.5163e-04
Loss = 6.5646e-01, PNorm = 39.0658, GNorm = 4.8429, lr_0 = 6.5538e-04
Loss = 5.7181e-01, PNorm = 39.0746, GNorm = 1.2380, lr_0 = 6.5913e-04
Loss = 6.3733e-01, PNorm = 39.0947, GNorm = 3.2064, lr_0 = 6.6288e-04
Loss = 5.5531e-01, PNorm = 39.1087, GNorm = 1.8906, lr_0 = 6.6663e-04
Loss = 5.8475e-01, PNorm = 39.1210, GNorm = 2.3033, lr_0 = 6.7038e-04
Loss = 5.8678e-01, PNorm = 39.1347, GNorm = 6.8895, lr_0 = 6.7413e-04
Loss = 6.5840e-01, PNorm = 39.1479, GNorm = 1.5312, lr_0 = 6.7788e-04
Loss = 5.7659e-01, PNorm = 39.1659, GNorm = 1.0479, lr_0 = 6.8163e-04
Loss = 6.4215e-01, PNorm = 39.1826, GNorm = 3.1042, lr_0 = 6.8538e-04
Loss = 7.1155e-01, PNorm = 39.1926, GNorm = 1.5349, lr_0 = 6.8913e-04
Loss = 6.8005e-01, PNorm = 39.2083, GNorm = 1.5947, lr_0 = 6.9288e-04
Loss = 6.0857e-01, PNorm = 39.2165, GNorm = 4.9731, lr_0 = 6.9663e-04
Loss = 6.4011e-01, PNorm = 39.2307, GNorm = 9.4500, lr_0 = 7.0038e-04
Loss = 5.5651e-01, PNorm = 39.2434, GNorm = 2.7772, lr_0 = 7.0413e-04
Loss = 5.6645e-01, PNorm = 39.2622, GNorm = 1.8767, lr_0 = 7.0788e-04
Loss = 6.1193e-01, PNorm = 39.2788, GNorm = 3.7508, lr_0 = 7.1163e-04
Loss = 5.1325e-01, PNorm = 39.2857, GNorm = 2.0121, lr_0 = 7.1538e-04
Loss = 6.1778e-01, PNorm = 39.2994, GNorm = 3.5323, lr_0 = 7.1913e-04
Loss = 4.9775e-01, PNorm = 39.3111, GNorm = 1.4745, lr_0 = 7.2288e-04
Loss = 5.1405e-01, PNorm = 39.3258, GNorm = 1.9121, lr_0 = 7.2663e-04
Loss = 5.1176e-01, PNorm = 39.3352, GNorm = 0.9905, lr_0 = 7.3038e-04
Loss = 7.2300e-01, PNorm = 39.3469, GNorm = 3.9867, lr_0 = 7.3413e-04
Loss = 6.2643e-01, PNorm = 39.3624, GNorm = 0.9902, lr_0 = 7.3788e-04
Loss = 5.8691e-01, PNorm = 39.3805, GNorm = 1.6216, lr_0 = 7.4163e-04
Loss = 5.9018e-01, PNorm = 39.3974, GNorm = 3.0722, lr_0 = 7.4538e-04
Loss = 6.4850e-01, PNorm = 39.4122, GNorm = 1.0988, lr_0 = 7.4913e-04
Loss = 4.8143e-01, PNorm = 39.4248, GNorm = 3.8752, lr_0 = 7.5288e-04
Loss = 6.0702e-01, PNorm = 39.4378, GNorm = 4.0399, lr_0 = 7.5663e-04
Loss = 5.1499e-01, PNorm = 39.4526, GNorm = 0.9139, lr_0 = 7.6038e-04
Loss = 5.8080e-01, PNorm = 39.4650, GNorm = 3.1991, lr_0 = 7.6413e-04
Loss = 5.4673e-01, PNorm = 39.4732, GNorm = 0.9406, lr_0 = 7.6788e-04
Loss = 5.2144e-01, PNorm = 39.4885, GNorm = 2.1734, lr_0 = 7.7163e-04
Loss = 6.0027e-01, PNorm = 39.5024, GNorm = 4.6664, lr_0 = 7.7538e-04
Loss = 6.2076e-01, PNorm = 39.5112, GNorm = 3.6343, lr_0 = 7.7913e-04
Loss = 5.7991e-01, PNorm = 39.5301, GNorm = 3.9030, lr_0 = 7.8288e-04
Loss = 5.9381e-01, PNorm = 39.5436, GNorm = 1.9488, lr_0 = 7.8663e-04
Loss = 6.7840e-01, PNorm = 39.5635, GNorm = 2.6689, lr_0 = 7.9038e-04
Loss = 5.5619e-01, PNorm = 39.5835, GNorm = 1.0368, lr_0 = 7.9413e-04
Loss = 5.9694e-01, PNorm = 39.5997, GNorm = 1.7056, lr_0 = 7.9788e-04
Loss = 5.3215e-01, PNorm = 39.6190, GNorm = 1.3881, lr_0 = 8.0163e-04
Loss = 5.7241e-01, PNorm = 39.6336, GNorm = 1.3756, lr_0 = 8.0538e-04
Loss = 5.4391e-01, PNorm = 39.6533, GNorm = 6.8084, lr_0 = 8.0913e-04
Loss = 6.0624e-01, PNorm = 39.6608, GNorm = 1.3687, lr_0 = 8.1288e-04
Loss = 5.7918e-01, PNorm = 39.6790, GNorm = 1.2281, lr_0 = 8.1663e-04
Loss = 5.3884e-01, PNorm = 39.6902, GNorm = 1.7059, lr_0 = 8.2038e-04
Loss = 6.3669e-01, PNorm = 39.7020, GNorm = 1.6993, lr_0 = 8.2413e-04
Loss = 6.0249e-01, PNorm = 39.7171, GNorm = 3.5638, lr_0 = 8.2788e-04
Loss = 5.9967e-01, PNorm = 39.7377, GNorm = 1.9290, lr_0 = 8.3163e-04
Loss = 6.3184e-01, PNorm = 39.7520, GNorm = 1.3357, lr_0 = 8.3538e-04
Loss = 5.9200e-01, PNorm = 39.7746, GNorm = 3.5231, lr_0 = 8.3913e-04
Loss = 5.2985e-01, PNorm = 39.7928, GNorm = 2.4353, lr_0 = 8.4288e-04
Loss = 6.4610e-01, PNorm = 39.8020, GNorm = 2.1856, lr_0 = 8.4663e-04
Loss = 6.0018e-01, PNorm = 39.8133, GNorm = 1.6775, lr_0 = 8.5038e-04
Loss = 5.6533e-01, PNorm = 39.8293, GNorm = 1.4642, lr_0 = 8.5413e-04
Loss = 6.3933e-01, PNorm = 39.8422, GNorm = 1.4594, lr_0 = 8.5788e-04
Loss = 5.8155e-01, PNorm = 39.8545, GNorm = 1.2804, lr_0 = 8.6163e-04
Loss = 5.7051e-01, PNorm = 39.8712, GNorm = 1.7369, lr_0 = 8.6538e-04
Loss = 5.5734e-01, PNorm = 39.8852, GNorm = 1.6260, lr_0 = 8.6913e-04
Loss = 6.6971e-01, PNorm = 39.9026, GNorm = 2.1044, lr_0 = 8.7288e-04
Loss = 6.6262e-01, PNorm = 39.9272, GNorm = 1.8460, lr_0 = 8.7663e-04
Loss = 6.3344e-01, PNorm = 39.9553, GNorm = 2.8624, lr_0 = 8.8038e-04
Loss = 5.3774e-01, PNorm = 39.9821, GNorm = 1.2990, lr_0 = 8.8413e-04
Loss = 5.6248e-01, PNorm = 40.0027, GNorm = 1.4569, lr_0 = 8.8788e-04
Loss = 5.3225e-01, PNorm = 40.0201, GNorm = 6.2110, lr_0 = 8.9163e-04
Loss = 5.8045e-01, PNorm = 40.0357, GNorm = 5.4550, lr_0 = 8.9538e-04
Loss = 5.9689e-01, PNorm = 40.0525, GNorm = 2.6932, lr_0 = 8.9913e-04
Loss = 4.7149e-01, PNorm = 40.0677, GNorm = 3.6182, lr_0 = 9.0288e-04
Loss = 6.4099e-01, PNorm = 40.0890, GNorm = 2.7063, lr_0 = 9.0663e-04
Loss = 5.6720e-01, PNorm = 40.1056, GNorm = 4.3519, lr_0 = 9.1038e-04
Loss = 5.9165e-01, PNorm = 40.1218, GNorm = 3.5240, lr_0 = 9.1413e-04
Loss = 6.8862e-01, PNorm = 40.1454, GNorm = 5.9496, lr_0 = 9.1788e-04
Loss = 4.9882e-01, PNorm = 40.1618, GNorm = 2.4631, lr_0 = 9.2163e-04
Loss = 5.4519e-01, PNorm = 40.1816, GNorm = 2.9210, lr_0 = 9.2538e-04
Loss = 5.4205e-01, PNorm = 40.1931, GNorm = 1.8048, lr_0 = 9.2913e-04
Loss = 5.6940e-01, PNorm = 40.2091, GNorm = 3.5765, lr_0 = 9.3288e-04
Loss = 5.7563e-01, PNorm = 40.2311, GNorm = 2.6629, lr_0 = 9.3663e-04
Loss = 5.2726e-01, PNorm = 40.2475, GNorm = 1.8749, lr_0 = 9.4038e-04
Loss = 5.9020e-01, PNorm = 40.2632, GNorm = 3.1073, lr_0 = 9.4413e-04
Loss = 5.0509e-01, PNorm = 40.2768, GNorm = 3.1885, lr_0 = 9.4788e-04
Loss = 5.5046e-01, PNorm = 40.2850, GNorm = 3.4487, lr_0 = 9.5163e-04
Loss = 5.0278e-01, PNorm = 40.3056, GNorm = 1.3996, lr_0 = 9.5538e-04
Loss = 5.5863e-01, PNorm = 40.3279, GNorm = 1.1396, lr_0 = 9.5913e-04
Loss = 6.6651e-01, PNorm = 40.3438, GNorm = 4.4331, lr_0 = 9.6288e-04
Loss = 5.5831e-01, PNorm = 40.3758, GNorm = 0.7819, lr_0 = 9.6663e-04
Loss = 5.4607e-01, PNorm = 40.4027, GNorm = 4.8794, lr_0 = 9.7038e-04
Loss = 5.6718e-01, PNorm = 40.4262, GNorm = 4.0073, lr_0 = 9.7413e-04
Loss = 6.0954e-01, PNorm = 40.4480, GNorm = 4.3251, lr_0 = 9.7788e-04
Loss = 5.7728e-01, PNorm = 40.4681, GNorm = 1.1766, lr_0 = 9.8163e-04
Loss = 6.0921e-01, PNorm = 40.4833, GNorm = 1.8971, lr_0 = 9.8537e-04
Loss = 5.4932e-01, PNorm = 40.5045, GNorm = 2.1789, lr_0 = 9.8912e-04
Loss = 5.7245e-01, PNorm = 40.5322, GNorm = 1.2855, lr_0 = 9.9288e-04
Loss = 5.8692e-01, PNorm = 40.5530, GNorm = 1.4102, lr_0 = 9.9663e-04
Loss = 6.0304e-01, PNorm = 40.5628, GNorm = 1.2206, lr_0 = 9.9993e-04
Validation mae = 0.130677
Epoch 2
Loss = 5.8675e-01, PNorm = 40.5807, GNorm = 1.7287, lr_0 = 9.9925e-04
Loss = 5.7779e-01, PNorm = 40.6009, GNorm = 1.2892, lr_0 = 9.9856e-04
Loss = 5.7068e-01, PNorm = 40.6150, GNorm = 3.6247, lr_0 = 9.9788e-04
Loss = 5.9863e-01, PNorm = 40.6403, GNorm = 1.3379, lr_0 = 9.9719e-04
Loss = 5.2034e-01, PNorm = 40.6630, GNorm = 1.8174, lr_0 = 9.9651e-04
Loss = 4.9212e-01, PNorm = 40.6882, GNorm = 4.6959, lr_0 = 9.9583e-04
Loss = 4.9409e-01, PNorm = 40.7118, GNorm = 0.8230, lr_0 = 9.9515e-04
Loss = 4.8676e-01, PNorm = 40.7338, GNorm = 3.1072, lr_0 = 9.9446e-04
Loss = 5.4412e-01, PNorm = 40.7455, GNorm = 1.3797, lr_0 = 9.9378e-04
Loss = 6.0482e-01, PNorm = 40.7650, GNorm = 1.2635, lr_0 = 9.9310e-04
Loss = 5.1094e-01, PNorm = 40.7929, GNorm = 2.2609, lr_0 = 9.9242e-04
Loss = 5.1003e-01, PNorm = 40.8115, GNorm = 1.5479, lr_0 = 9.9174e-04
Loss = 4.8167e-01, PNorm = 40.8296, GNorm = 2.0283, lr_0 = 9.9106e-04
Loss = 5.7013e-01, PNorm = 40.8533, GNorm = 2.0125, lr_0 = 9.9038e-04
Loss = 6.2547e-01, PNorm = 40.8870, GNorm = 1.9851, lr_0 = 9.8971e-04
Loss = 5.6529e-01, PNorm = 40.9179, GNorm = 3.9466, lr_0 = 9.8903e-04
Loss = 6.0849e-01, PNorm = 40.9423, GNorm = 2.8254, lr_0 = 9.8835e-04
Loss = 6.0356e-01, PNorm = 40.9760, GNorm = 2.0406, lr_0 = 9.8767e-04
Loss = 5.9189e-01, PNorm = 41.0175, GNorm = 2.6386, lr_0 = 9.8700e-04
Loss = 5.5748e-01, PNorm = 41.0404, GNorm = 3.9519, lr_0 = 9.8632e-04
Loss = 6.3430e-01, PNorm = 41.0614, GNorm = 2.1959, lr_0 = 9.8564e-04
Loss = 4.6121e-01, PNorm = 41.0872, GNorm = 1.5834, lr_0 = 9.8497e-04
Loss = 5.6560e-01, PNorm = 41.1058, GNorm = 1.9522, lr_0 = 9.8429e-04
Loss = 5.1966e-01, PNorm = 41.1221, GNorm = 1.3399, lr_0 = 9.8362e-04
Loss = 5.5383e-01, PNorm = 41.1427, GNorm = 1.8201, lr_0 = 9.8295e-04
Loss = 5.9214e-01, PNorm = 41.1711, GNorm = 2.4510, lr_0 = 9.8227e-04
Loss = 5.4588e-01, PNorm = 41.1958, GNorm = 1.6493, lr_0 = 9.8160e-04
Loss = 5.3750e-01, PNorm = 41.2199, GNorm = 3.8522, lr_0 = 9.8093e-04
Loss = 5.9323e-01, PNorm = 41.2321, GNorm = 2.3312, lr_0 = 9.8026e-04
Loss = 5.8713e-01, PNorm = 41.2620, GNorm = 1.4358, lr_0 = 9.7958e-04
Loss = 5.3465e-01, PNorm = 41.2948, GNorm = 0.8935, lr_0 = 9.7891e-04
Loss = 5.7094e-01, PNorm = 41.3162, GNorm = 1.9626, lr_0 = 9.7824e-04
Loss = 5.0063e-01, PNorm = 41.3372, GNorm = 1.3072, lr_0 = 9.7757e-04
Loss = 5.5195e-01, PNorm = 41.3488, GNorm = 3.2533, lr_0 = 9.7690e-04
Loss = 6.2484e-01, PNorm = 41.3591, GNorm = 2.2287, lr_0 = 9.7623e-04
Loss = 5.3555e-01, PNorm = 41.3824, GNorm = 0.8928, lr_0 = 9.7556e-04
Loss = 6.2954e-01, PNorm = 41.4120, GNorm = 1.0850, lr_0 = 9.7490e-04
Loss = 5.3127e-01, PNorm = 41.4399, GNorm = 1.9670, lr_0 = 9.7423e-04
Loss = 5.1581e-01, PNorm = 41.4574, GNorm = 1.2528, lr_0 = 9.7356e-04
Loss = 5.1683e-01, PNorm = 41.4759, GNorm = 1.5945, lr_0 = 9.7289e-04
Loss = 4.9236e-01, PNorm = 41.4976, GNorm = 1.3036, lr_0 = 9.7223e-04
Loss = 5.8595e-01, PNorm = 41.5161, GNorm = 1.6711, lr_0 = 9.7156e-04
Loss = 5.7854e-01, PNorm = 41.5361, GNorm = 1.3091, lr_0 = 9.7090e-04
Loss = 5.2005e-01, PNorm = 41.5552, GNorm = 2.6794, lr_0 = 9.7023e-04
Loss = 5.3534e-01, PNorm = 41.5793, GNorm = 1.1736, lr_0 = 9.6957e-04
Loss = 5.8157e-01, PNorm = 41.6055, GNorm = 1.2791, lr_0 = 9.6890e-04
Loss = 5.9694e-01, PNorm = 41.6353, GNorm = 1.5920, lr_0 = 9.6824e-04
Loss = 5.3888e-01, PNorm = 41.6610, GNorm = 0.9941, lr_0 = 9.6757e-04
Loss = 5.0268e-01, PNorm = 41.6763, GNorm = 1.7683, lr_0 = 9.6691e-04
Loss = 5.7696e-01, PNorm = 41.6960, GNorm = 4.2440, lr_0 = 9.6625e-04
Loss = 6.0656e-01, PNorm = 41.7256, GNorm = 2.2628, lr_0 = 9.6559e-04
Loss = 5.2456e-01, PNorm = 41.7565, GNorm = 0.8483, lr_0 = 9.6493e-04
Loss = 5.3484e-01, PNorm = 41.7863, GNorm = 1.2775, lr_0 = 9.6427e-04
Loss = 5.0779e-01, PNorm = 41.8007, GNorm = 0.9962, lr_0 = 9.6360e-04
Loss = 5.7968e-01, PNorm = 41.8216, GNorm = 2.1847, lr_0 = 9.6294e-04
Loss = 5.8949e-01, PNorm = 41.8410, GNorm = 2.8467, lr_0 = 9.6228e-04
Loss = 6.2434e-01, PNorm = 41.8676, GNorm = 3.7624, lr_0 = 9.6163e-04
Loss = 4.3067e-01, PNorm = 41.8882, GNorm = 1.5609, lr_0 = 9.6097e-04
Loss = 5.2466e-01, PNorm = 41.9041, GNorm = 1.7464, lr_0 = 9.6031e-04
Loss = 5.3278e-01, PNorm = 41.9247, GNorm = 1.5761, lr_0 = 9.5965e-04
Loss = 4.9704e-01, PNorm = 41.9463, GNorm = 2.0843, lr_0 = 9.5899e-04
Loss = 6.2515e-01, PNorm = 41.9728, GNorm = 2.6793, lr_0 = 9.5834e-04
Loss = 6.1621e-01, PNorm = 42.0000, GNorm = 1.8485, lr_0 = 9.5768e-04
Loss = 5.6171e-01, PNorm = 42.0120, GNorm = 1.3550, lr_0 = 9.5702e-04
Loss = 5.4333e-01, PNorm = 42.0426, GNorm = 1.6992, lr_0 = 9.5637e-04
Loss = 5.0059e-01, PNorm = 42.0563, GNorm = 1.2894, lr_0 = 9.5571e-04
Loss = 5.2649e-01, PNorm = 42.0726, GNorm = 0.9644, lr_0 = 9.5506e-04
Loss = 5.2232e-01, PNorm = 42.0996, GNorm = 2.1136, lr_0 = 9.5440e-04
Loss = 5.1188e-01, PNorm = 42.1108, GNorm = 1.9271, lr_0 = 9.5375e-04
Loss = 5.6132e-01, PNorm = 42.1275, GNorm = 1.5742, lr_0 = 9.5310e-04
Loss = 5.0111e-01, PNorm = 42.1467, GNorm = 2.1685, lr_0 = 9.5244e-04
Loss = 5.9793e-01, PNorm = 42.1641, GNorm = 0.9023, lr_0 = 9.5179e-04
Loss = 4.8342e-01, PNorm = 42.1859, GNorm = 0.7779, lr_0 = 9.5114e-04
Loss = 5.9264e-01, PNorm = 42.2085, GNorm = 1.6587, lr_0 = 9.5049e-04
Loss = 5.4513e-01, PNorm = 42.2369, GNorm = 1.1101, lr_0 = 9.4984e-04
Loss = 5.8915e-01, PNorm = 42.2564, GNorm = 1.2637, lr_0 = 9.4919e-04
Loss = 5.2309e-01, PNorm = 42.2755, GNorm = 2.0513, lr_0 = 9.4854e-04
Loss = 5.7066e-01, PNorm = 42.2941, GNorm = 1.3151, lr_0 = 9.4789e-04
Loss = 5.4192e-01, PNorm = 42.3168, GNorm = 1.8548, lr_0 = 9.4724e-04
Loss = 5.0370e-01, PNorm = 42.3467, GNorm = 1.5707, lr_0 = 9.4659e-04
Loss = 5.0985e-01, PNorm = 42.3646, GNorm = 3.6941, lr_0 = 9.4594e-04
Loss = 5.3501e-01, PNorm = 42.3923, GNorm = 1.3096, lr_0 = 9.4529e-04
Loss = 4.8435e-01, PNorm = 42.4082, GNorm = 2.0270, lr_0 = 9.4464e-04
Loss = 5.0124e-01, PNorm = 42.4211, GNorm = 1.1784, lr_0 = 9.4400e-04
Loss = 5.4139e-01, PNorm = 42.4467, GNorm = 1.8097, lr_0 = 9.4335e-04
Loss = 5.6939e-01, PNorm = 42.4717, GNorm = 1.2451, lr_0 = 9.4270e-04
Loss = 5.3424e-01, PNorm = 42.4905, GNorm = 1.2222, lr_0 = 9.4206e-04
Loss = 5.1457e-01, PNorm = 42.5071, GNorm = 2.2125, lr_0 = 9.4141e-04
Loss = 5.4229e-01, PNorm = 42.5308, GNorm = 0.9781, lr_0 = 9.4077e-04
Loss = 5.5585e-01, PNorm = 42.5501, GNorm = 3.1611, lr_0 = 9.4012e-04
Loss = 5.4084e-01, PNorm = 42.5724, GNorm = 1.2334, lr_0 = 9.3948e-04
Loss = 5.4769e-01, PNorm = 42.5890, GNorm = 1.3459, lr_0 = 9.3884e-04
Loss = 4.9158e-01, PNorm = 42.6042, GNorm = 1.5610, lr_0 = 9.3819e-04
Loss = 5.2696e-01, PNorm = 42.6257, GNorm = 0.9353, lr_0 = 9.3755e-04
Loss = 5.1364e-01, PNorm = 42.6374, GNorm = 3.6480, lr_0 = 9.3691e-04
Loss = 4.7794e-01, PNorm = 42.6641, GNorm = 1.2600, lr_0 = 9.3627e-04
Loss = 4.9961e-01, PNorm = 42.6854, GNorm = 1.0528, lr_0 = 9.3562e-04
Loss = 5.4141e-01, PNorm = 42.7142, GNorm = 1.5258, lr_0 = 9.3498e-04
Loss = 5.5647e-01, PNorm = 42.7296, GNorm = 1.9372, lr_0 = 9.3434e-04
Loss = 5.2003e-01, PNorm = 42.7502, GNorm = 1.8655, lr_0 = 9.3370e-04
Loss = 5.5848e-01, PNorm = 42.7689, GNorm = 1.8885, lr_0 = 9.3306e-04
Loss = 5.3798e-01, PNorm = 42.7864, GNorm = 2.1431, lr_0 = 9.3242e-04
Loss = 5.3983e-01, PNorm = 42.7972, GNorm = 3.6829, lr_0 = 9.3178e-04
Loss = 5.0462e-01, PNorm = 42.8227, GNorm = 2.5352, lr_0 = 9.3115e-04
Loss = 5.2271e-01, PNorm = 42.8531, GNorm = 2.0064, lr_0 = 9.3051e-04
Loss = 5.5619e-01, PNorm = 42.8832, GNorm = 2.6251, lr_0 = 9.2987e-04
Loss = 6.1438e-01, PNorm = 42.9072, GNorm = 1.2874, lr_0 = 9.2923e-04
Loss = 5.5991e-01, PNorm = 42.9342, GNorm = 1.6624, lr_0 = 9.2860e-04
Loss = 5.6421e-01, PNorm = 42.9600, GNorm = 1.2193, lr_0 = 9.2796e-04
Loss = 5.1480e-01, PNorm = 42.9743, GNorm = 1.5741, lr_0 = 9.2733e-04
Loss = 5.2442e-01, PNorm = 42.9930, GNorm = 2.3542, lr_0 = 9.2669e-04
Loss = 6.1132e-01, PNorm = 43.0106, GNorm = 3.5797, lr_0 = 9.2606e-04
Loss = 5.5975e-01, PNorm = 43.0414, GNorm = 1.1571, lr_0 = 9.2542e-04
Loss = 4.8234e-01, PNorm = 43.0669, GNorm = 1.3290, lr_0 = 9.2479e-04
Loss = 5.1679e-01, PNorm = 43.0880, GNorm = 2.0101, lr_0 = 9.2415e-04
Loss = 5.1482e-01, PNorm = 43.1080, GNorm = 2.7671, lr_0 = 9.2352e-04
Loss = 5.4469e-01, PNorm = 43.1348, GNorm = 2.9734, lr_0 = 9.2289e-04
Loss = 5.5751e-01, PNorm = 43.1545, GNorm = 1.0630, lr_0 = 9.2226e-04
Loss = 5.6279e-01, PNorm = 43.1707, GNorm = 3.5212, lr_0 = 9.2162e-04
Loss = 5.1101e-01, PNorm = 43.1923, GNorm = 1.3171, lr_0 = 9.2099e-04
Validation mae = 0.128676
Epoch 3
Loss = 6.1152e-01, PNorm = 43.2013, GNorm = 1.1226, lr_0 = 9.2036e-04
Loss = 5.6635e-01, PNorm = 43.2198, GNorm = 2.2272, lr_0 = 9.1973e-04
Loss = 5.4817e-01, PNorm = 43.2364, GNorm = 1.0483, lr_0 = 9.1910e-04
Loss = 5.5945e-01, PNorm = 43.2563, GNorm = 2.0587, lr_0 = 9.1847e-04
Loss = 5.8416e-01, PNorm = 43.2731, GNorm = 2.9910, lr_0 = 9.1784e-04
Loss = 4.8696e-01, PNorm = 43.2933, GNorm = 1.8254, lr_0 = 9.1721e-04
Loss = 5.5425e-01, PNorm = 43.3126, GNorm = 4.2252, lr_0 = 9.1658e-04
Loss = 6.2476e-01, PNorm = 43.3401, GNorm = 1.0343, lr_0 = 9.1596e-04
Loss = 6.1666e-01, PNorm = 43.3753, GNorm = 2.2299, lr_0 = 9.1533e-04
Loss = 5.9932e-01, PNorm = 43.4152, GNorm = 1.1442, lr_0 = 9.1470e-04
Loss = 5.0614e-01, PNorm = 43.4375, GNorm = 1.6413, lr_0 = 9.1408e-04
Loss = 4.7748e-01, PNorm = 43.4540, GNorm = 1.2164, lr_0 = 9.1345e-04
Loss = 5.0668e-01, PNorm = 43.4703, GNorm = 3.2795, lr_0 = 9.1282e-04
Loss = 5.2088e-01, PNorm = 43.4943, GNorm = 1.0322, lr_0 = 9.1220e-04
Loss = 4.9437e-01, PNorm = 43.5077, GNorm = 1.5035, lr_0 = 9.1157e-04
Loss = 5.8004e-01, PNorm = 43.5222, GNorm = 0.8957, lr_0 = 9.1095e-04
Loss = 4.6604e-01, PNorm = 43.5409, GNorm = 1.1173, lr_0 = 9.1032e-04
Loss = 5.7022e-01, PNorm = 43.5567, GNorm = 1.6243, lr_0 = 9.0970e-04
Loss = 5.3564e-01, PNorm = 43.5739, GNorm = 1.8658, lr_0 = 9.0908e-04
Loss = 5.0275e-01, PNorm = 43.5944, GNorm = 1.2781, lr_0 = 9.0846e-04
Loss = 4.9478e-01, PNorm = 43.6212, GNorm = 1.4190, lr_0 = 9.0783e-04
Loss = 7.1858e-01, PNorm = 43.6394, GNorm = 1.5086, lr_0 = 9.0721e-04
Loss = 5.6607e-01, PNorm = 43.6759, GNorm = 1.7856, lr_0 = 9.0659e-04
Loss = 5.2349e-01, PNorm = 43.6985, GNorm = 2.8346, lr_0 = 9.0597e-04
Loss = 5.7948e-01, PNorm = 43.7302, GNorm = 3.6317, lr_0 = 9.0535e-04
Loss = 5.2151e-01, PNorm = 43.7548, GNorm = 2.1865, lr_0 = 9.0473e-04
Loss = 6.6256e-01, PNorm = 43.7776, GNorm = 2.0119, lr_0 = 9.0411e-04
Loss = 6.0857e-01, PNorm = 43.8070, GNorm = 1.3981, lr_0 = 9.0349e-04
Loss = 5.2925e-01, PNorm = 43.8259, GNorm = 1.9392, lr_0 = 9.0287e-04
Loss = 5.2770e-01, PNorm = 43.8500, GNorm = 1.8207, lr_0 = 9.0225e-04
Loss = 4.9614e-01, PNorm = 43.8689, GNorm = 1.0945, lr_0 = 9.0163e-04
Loss = 5.0078e-01, PNorm = 43.8811, GNorm = 1.3381, lr_0 = 9.0102e-04
Loss = 5.3785e-01, PNorm = 43.8984, GNorm = 1.7444, lr_0 = 9.0040e-04
Loss = 5.0159e-01, PNorm = 43.9091, GNorm = 1.6866, lr_0 = 8.9978e-04
Loss = 5.3102e-01, PNorm = 43.9324, GNorm = 1.8166, lr_0 = 8.9916e-04
Loss = 5.0706e-01, PNorm = 43.9582, GNorm = 1.0052, lr_0 = 8.9855e-04
Loss = 4.7447e-01, PNorm = 43.9821, GNorm = 1.2547, lr_0 = 8.9793e-04
Loss = 4.7420e-01, PNorm = 44.0035, GNorm = 2.0365, lr_0 = 8.9732e-04
Loss = 5.0898e-01, PNorm = 44.0204, GNorm = 2.1374, lr_0 = 8.9670e-04
Loss = 4.7933e-01, PNorm = 44.0357, GNorm = 1.1819, lr_0 = 8.9609e-04
Loss = 5.0679e-01, PNorm = 44.0522, GNorm = 1.0549, lr_0 = 8.9548e-04
Loss = 4.6412e-01, PNorm = 44.0714, GNorm = 1.0175, lr_0 = 8.9486e-04
Loss = 5.0939e-01, PNorm = 44.0912, GNorm = 2.2813, lr_0 = 8.9425e-04
Loss = 5.9066e-01, PNorm = 44.1080, GNorm = 2.0710, lr_0 = 8.9364e-04
Loss = 4.5538e-01, PNorm = 44.1239, GNorm = 1.5125, lr_0 = 8.9302e-04
Loss = 5.2212e-01, PNorm = 44.1368, GNorm = 1.2164, lr_0 = 8.9241e-04
Loss = 5.2111e-01, PNorm = 44.1519, GNorm = 1.3419, lr_0 = 8.9180e-04
Loss = 4.3755e-01, PNorm = 44.1738, GNorm = 1.7719, lr_0 = 8.9119e-04
Loss = 5.2504e-01, PNorm = 44.1899, GNorm = 1.6999, lr_0 = 8.9058e-04
Loss = 4.5545e-01, PNorm = 44.2151, GNorm = 1.3890, lr_0 = 8.8997e-04
Loss = 4.7883e-01, PNorm = 44.2355, GNorm = 1.7696, lr_0 = 8.8936e-04
Loss = 5.2890e-01, PNorm = 44.2594, GNorm = 1.2002, lr_0 = 8.8875e-04
Loss = 5.1163e-01, PNorm = 44.2808, GNorm = 3.1608, lr_0 = 8.8814e-04
Loss = 5.3536e-01, PNorm = 44.3045, GNorm = 1.3833, lr_0 = 8.8753e-04
Loss = 5.0636e-01, PNorm = 44.3260, GNorm = 1.9637, lr_0 = 8.8693e-04
Loss = 4.7405e-01, PNorm = 44.3452, GNorm = 1.1184, lr_0 = 8.8632e-04
Loss = 4.8182e-01, PNorm = 44.3586, GNorm = 1.6094, lr_0 = 8.8571e-04
Loss = 5.0336e-01, PNorm = 44.3770, GNorm = 2.1301, lr_0 = 8.8510e-04
Loss = 4.9426e-01, PNorm = 44.4015, GNorm = 1.3407, lr_0 = 8.8450e-04
Loss = 5.4131e-01, PNorm = 44.4181, GNorm = 1.6586, lr_0 = 8.8389e-04
Loss = 5.0312e-01, PNorm = 44.4352, GNorm = 1.6050, lr_0 = 8.8329e-04
Loss = 4.6252e-01, PNorm = 44.4473, GNorm = 0.9214, lr_0 = 8.8268e-04
Loss = 5.0023e-01, PNorm = 44.4617, GNorm = 1.7262, lr_0 = 8.8208e-04
Loss = 5.3516e-01, PNorm = 44.4794, GNorm = 1.6129, lr_0 = 8.8147e-04
Loss = 4.8623e-01, PNorm = 44.5005, GNorm = 1.6329, lr_0 = 8.8087e-04
Loss = 4.8937e-01, PNorm = 44.5181, GNorm = 2.3462, lr_0 = 8.8026e-04
Loss = 4.7585e-01, PNorm = 44.5377, GNorm = 1.0639, lr_0 = 8.7966e-04
Loss = 5.0490e-01, PNorm = 44.5573, GNorm = 1.1961, lr_0 = 8.7906e-04
Loss = 4.2679e-01, PNorm = 44.5756, GNorm = 1.0339, lr_0 = 8.7846e-04
Loss = 5.4826e-01, PNorm = 44.5905, GNorm = 1.1271, lr_0 = 8.7785e-04
Loss = 5.3070e-01, PNorm = 44.6066, GNorm = 1.2771, lr_0 = 8.7725e-04
Loss = 5.8877e-01, PNorm = 44.6261, GNorm = 2.1549, lr_0 = 8.7665e-04
Loss = 4.9588e-01, PNorm = 44.6436, GNorm = 1.8763, lr_0 = 8.7605e-04
Loss = 5.4982e-01, PNorm = 44.6705, GNorm = 1.6875, lr_0 = 8.7545e-04
Loss = 5.3077e-01, PNorm = 44.6913, GNorm = 1.9965, lr_0 = 8.7485e-04
Loss = 4.6654e-01, PNorm = 44.7143, GNorm = 0.8530, lr_0 = 8.7425e-04
Loss = 5.2649e-01, PNorm = 44.7258, GNorm = 1.3474, lr_0 = 8.7365e-04
Loss = 4.4576e-01, PNorm = 44.7439, GNorm = 1.1566, lr_0 = 8.7306e-04
Loss = 5.6228e-01, PNorm = 44.7582, GNorm = 2.1974, lr_0 = 8.7246e-04
Loss = 4.8876e-01, PNorm = 44.7655, GNorm = 2.0719, lr_0 = 8.7186e-04
Loss = 5.0251e-01, PNorm = 44.7731, GNorm = 2.3764, lr_0 = 8.7126e-04
Loss = 5.0326e-01, PNorm = 44.7948, GNorm = 3.1916, lr_0 = 8.7067e-04
Loss = 5.0404e-01, PNorm = 44.8085, GNorm = 1.1827, lr_0 = 8.7007e-04
Loss = 5.1728e-01, PNorm = 44.8295, GNorm = 1.3030, lr_0 = 8.6947e-04
Loss = 4.7334e-01, PNorm = 44.8545, GNorm = 1.1358, lr_0 = 8.6888e-04
Loss = 5.0392e-01, PNorm = 44.8775, GNorm = 2.4753, lr_0 = 8.6828e-04
Loss = 3.8012e-01, PNorm = 44.8911, GNorm = 1.8361, lr_0 = 8.6769e-04
Loss = 5.6791e-01, PNorm = 44.9117, GNorm = 4.0714, lr_0 = 8.6709e-04
Loss = 5.1895e-01, PNorm = 44.9332, GNorm = 1.3802, lr_0 = 8.6650e-04
Loss = 4.6214e-01, PNorm = 44.9565, GNorm = 1.6424, lr_0 = 8.6590e-04
Loss = 5.2940e-01, PNorm = 44.9632, GNorm = 1.7833, lr_0 = 8.6531e-04
Loss = 5.5247e-01, PNorm = 44.9765, GNorm = 1.3612, lr_0 = 8.6472e-04
Loss = 5.1626e-01, PNorm = 45.0008, GNorm = 2.3981, lr_0 = 8.6413e-04
Loss = 4.9937e-01, PNorm = 45.0173, GNorm = 1.7775, lr_0 = 8.6353e-04
Loss = 4.7913e-01, PNorm = 45.0336, GNorm = 1.1840, lr_0 = 8.6294e-04
Loss = 4.6880e-01, PNorm = 45.0534, GNorm = 2.4772, lr_0 = 8.6235e-04
Loss = 4.8582e-01, PNorm = 45.0748, GNorm = 0.8126, lr_0 = 8.6176e-04
Loss = 5.6928e-01, PNorm = 45.1017, GNorm = 3.8769, lr_0 = 8.6117e-04
Loss = 3.9109e-01, PNorm = 45.1175, GNorm = 0.8495, lr_0 = 8.6058e-04
Loss = 5.0484e-01, PNorm = 45.1334, GNorm = 2.0779, lr_0 = 8.5999e-04
Loss = 5.0953e-01, PNorm = 45.1435, GNorm = 2.3802, lr_0 = 8.5940e-04
Loss = 4.8125e-01, PNorm = 45.1633, GNorm = 2.5568, lr_0 = 8.5881e-04
Loss = 4.9107e-01, PNorm = 45.1871, GNorm = 1.1106, lr_0 = 8.5823e-04
Loss = 4.4413e-01, PNorm = 45.2086, GNorm = 0.8235, lr_0 = 8.5764e-04
Loss = 4.8309e-01, PNorm = 45.2289, GNorm = 2.1804, lr_0 = 8.5705e-04
Loss = 4.5536e-01, PNorm = 45.2494, GNorm = 1.5991, lr_0 = 8.5646e-04
Loss = 5.3788e-01, PNorm = 45.2625, GNorm = 0.7897, lr_0 = 8.5588e-04
Loss = 4.6504e-01, PNorm = 45.2754, GNorm = 0.9263, lr_0 = 8.5529e-04
Loss = 4.9294e-01, PNorm = 45.2875, GNorm = 2.0397, lr_0 = 8.5470e-04
Loss = 4.6024e-01, PNorm = 45.3063, GNorm = 1.2231, lr_0 = 8.5412e-04
Loss = 4.6241e-01, PNorm = 45.3210, GNorm = 0.9915, lr_0 = 8.5353e-04
Loss = 4.7548e-01, PNorm = 45.3340, GNorm = 1.7249, lr_0 = 8.5295e-04
Loss = 5.5423e-01, PNorm = 45.3483, GNorm = 1.8377, lr_0 = 8.5236e-04
Loss = 4.7283e-01, PNorm = 45.3585, GNorm = 1.0739, lr_0 = 8.5178e-04
Loss = 5.3021e-01, PNorm = 45.3733, GNorm = 1.1123, lr_0 = 8.5120e-04
Loss = 5.3271e-01, PNorm = 45.3909, GNorm = 1.2098, lr_0 = 8.5061e-04
Loss = 4.8501e-01, PNorm = 45.4071, GNorm = 1.2821, lr_0 = 8.5003e-04
Loss = 4.7107e-01, PNorm = 45.4216, GNorm = 3.0269, lr_0 = 8.4945e-04
Loss = 5.6167e-01, PNorm = 45.4464, GNorm = 3.7709, lr_0 = 8.4887e-04
Loss = 4.3836e-01, PNorm = 45.4773, GNorm = 1.4130, lr_0 = 8.4828e-04
Validation mae = 0.124994
Epoch 4
Loss = 4.9656e-01, PNorm = 45.5049, GNorm = 2.6339, lr_0 = 8.4770e-04
Loss = 4.9898e-01, PNorm = 45.5259, GNorm = 1.2786, lr_0 = 8.4712e-04
Loss = 5.4530e-01, PNorm = 45.5488, GNorm = 1.9558, lr_0 = 8.4654e-04
Loss = 5.3051e-01, PNorm = 45.5743, GNorm = 2.2105, lr_0 = 8.4596e-04
Loss = 5.3030e-01, PNorm = 45.5851, GNorm = 2.0851, lr_0 = 8.4538e-04
Loss = 5.4721e-01, PNorm = 45.6037, GNorm = 1.1091, lr_0 = 8.4480e-04
Loss = 4.6587e-01, PNorm = 45.6265, GNorm = 1.1262, lr_0 = 8.4423e-04
Loss = 5.0206e-01, PNorm = 45.6461, GNorm = 2.0177, lr_0 = 8.4365e-04
Loss = 4.8011e-01, PNorm = 45.6668, GNorm = 1.2502, lr_0 = 8.4307e-04
Loss = 4.9774e-01, PNorm = 45.6849, GNorm = 1.3171, lr_0 = 8.4249e-04
Loss = 4.4929e-01, PNorm = 45.7064, GNorm = 2.9855, lr_0 = 8.4191e-04
Loss = 5.1493e-01, PNorm = 45.7235, GNorm = 2.3199, lr_0 = 8.4134e-04
Loss = 4.5194e-01, PNorm = 45.7402, GNorm = 1.1584, lr_0 = 8.4076e-04
Loss = 4.9235e-01, PNorm = 45.7578, GNorm = 1.2275, lr_0 = 8.4019e-04
Loss = 5.1349e-01, PNorm = 45.7762, GNorm = 1.4433, lr_0 = 8.3961e-04
Loss = 5.1404e-01, PNorm = 45.8068, GNorm = 1.9955, lr_0 = 8.3903e-04
Loss = 5.1700e-01, PNorm = 45.8313, GNorm = 1.1566, lr_0 = 8.3846e-04
Loss = 4.7222e-01, PNorm = 45.8572, GNorm = 2.0023, lr_0 = 8.3789e-04
Loss = 5.4813e-01, PNorm = 45.8793, GNorm = 1.7510, lr_0 = 8.3731e-04
Loss = 4.8244e-01, PNorm = 45.9030, GNorm = 2.5450, lr_0 = 8.3674e-04
Loss = 4.7981e-01, PNorm = 45.9239, GNorm = 1.5548, lr_0 = 8.3616e-04
Loss = 4.8930e-01, PNorm = 45.9411, GNorm = 0.9533, lr_0 = 8.3559e-04
Loss = 5.2487e-01, PNorm = 45.9542, GNorm = 2.0157, lr_0 = 8.3502e-04
Loss = 5.2380e-01, PNorm = 45.9699, GNorm = 0.9753, lr_0 = 8.3445e-04
Loss = 4.7221e-01, PNorm = 45.9868, GNorm = 2.4451, lr_0 = 8.3388e-04
Loss = 4.5469e-01, PNorm = 46.0143, GNorm = 1.5413, lr_0 = 8.3330e-04
Loss = 4.8046e-01, PNorm = 46.0410, GNorm = 2.6482, lr_0 = 8.3273e-04
Loss = 4.6924e-01, PNorm = 46.0547, GNorm = 3.6049, lr_0 = 8.3216e-04
Loss = 4.7884e-01, PNorm = 46.0747, GNorm = 1.6789, lr_0 = 8.3159e-04
Loss = 5.5880e-01, PNorm = 46.0992, GNorm = 4.4327, lr_0 = 8.3102e-04
Loss = 5.0884e-01, PNorm = 46.1068, GNorm = 2.0508, lr_0 = 8.3045e-04
Loss = 4.6993e-01, PNorm = 46.1285, GNorm = 1.2487, lr_0 = 8.2988e-04
Loss = 6.1928e-01, PNorm = 46.1465, GNorm = 1.8493, lr_0 = 8.2932e-04
Loss = 5.0958e-01, PNorm = 46.1624, GNorm = 1.4734, lr_0 = 8.2875e-04
Loss = 4.3150e-01, PNorm = 46.1673, GNorm = 1.4552, lr_0 = 8.2818e-04
Loss = 4.9720e-01, PNorm = 46.1885, GNorm = 2.3255, lr_0 = 8.2761e-04
Loss = 5.2771e-01, PNorm = 46.2162, GNorm = 1.5232, lr_0 = 8.2705e-04
Loss = 5.4903e-01, PNorm = 46.2379, GNorm = 1.4064, lr_0 = 8.2648e-04
Loss = 5.0401e-01, PNorm = 46.2506, GNorm = 1.2830, lr_0 = 8.2591e-04
Loss = 4.7634e-01, PNorm = 46.2633, GNorm = 1.1989, lr_0 = 8.2535e-04
Loss = 4.3922e-01, PNorm = 46.2818, GNorm = 1.4790, lr_0 = 8.2478e-04
Loss = 4.9603e-01, PNorm = 46.2999, GNorm = 2.0967, lr_0 = 8.2422e-04
Loss = 5.5353e-01, PNorm = 46.3191, GNorm = 1.5822, lr_0 = 8.2365e-04
Loss = 4.7018e-01, PNorm = 46.3429, GNorm = 1.9214, lr_0 = 8.2309e-04
Loss = 4.2421e-01, PNorm = 46.3512, GNorm = 1.1874, lr_0 = 8.2252e-04
Loss = 5.2601e-01, PNorm = 46.3679, GNorm = 1.7353, lr_0 = 8.2196e-04
Loss = 4.4188e-01, PNorm = 46.3919, GNorm = 1.8260, lr_0 = 8.2140e-04
Loss = 4.9779e-01, PNorm = 46.4057, GNorm = 3.7931, lr_0 = 8.2084e-04
Loss = 5.1414e-01, PNorm = 46.4207, GNorm = 1.8122, lr_0 = 8.2027e-04
Loss = 4.9791e-01, PNorm = 46.4429, GNorm = 0.9564, lr_0 = 8.1971e-04
Loss = 4.7141e-01, PNorm = 46.4615, GNorm = 2.8436, lr_0 = 8.1915e-04
Loss = 5.0916e-01, PNorm = 46.4826, GNorm = 1.2347, lr_0 = 8.1859e-04
Loss = 4.3671e-01, PNorm = 46.4947, GNorm = 0.9782, lr_0 = 8.1803e-04
Loss = 4.2691e-01, PNorm = 46.5095, GNorm = 1.9409, lr_0 = 8.1747e-04
Loss = 5.2971e-01, PNorm = 46.5282, GNorm = 1.3865, lr_0 = 8.1691e-04
Loss = 5.4417e-01, PNorm = 46.5539, GNorm = 1.1698, lr_0 = 8.1635e-04
Loss = 4.5703e-01, PNorm = 46.5723, GNorm = 3.8444, lr_0 = 8.1579e-04
Loss = 4.9677e-01, PNorm = 46.5929, GNorm = 1.4882, lr_0 = 8.1523e-04
Loss = 4.4189e-01, PNorm = 46.6162, GNorm = 1.0528, lr_0 = 8.1467e-04
Loss = 5.1434e-01, PNorm = 46.6381, GNorm = 1.1644, lr_0 = 8.1411e-04
Loss = 5.1802e-01, PNorm = 46.6487, GNorm = 0.9032, lr_0 = 8.1355e-04
Loss = 4.7524e-01, PNorm = 46.6575, GNorm = 1.2611, lr_0 = 8.1300e-04
Loss = 4.8281e-01, PNorm = 46.6714, GNorm = 1.0186, lr_0 = 8.1244e-04
Loss = 5.1546e-01, PNorm = 46.6929, GNorm = 1.5659, lr_0 = 8.1188e-04
Loss = 4.8412e-01, PNorm = 46.7178, GNorm = 0.9763, lr_0 = 8.1133e-04
Loss = 4.6153e-01, PNorm = 46.7405, GNorm = 1.0657, lr_0 = 8.1077e-04
Loss = 4.4024e-01, PNorm = 46.7519, GNorm = 1.8196, lr_0 = 8.1022e-04
Loss = 4.4819e-01, PNorm = 46.7718, GNorm = 1.7422, lr_0 = 8.0966e-04
Loss = 4.9629e-01, PNorm = 46.7810, GNorm = 1.5610, lr_0 = 8.0911e-04
Loss = 5.1856e-01, PNorm = 46.8075, GNorm = 1.7338, lr_0 = 8.0855e-04
Loss = 4.7224e-01, PNorm = 46.8302, GNorm = 2.0314, lr_0 = 8.0800e-04
Loss = 4.5912e-01, PNorm = 46.8504, GNorm = 1.9622, lr_0 = 8.0745e-04
Loss = 4.6806e-01, PNorm = 46.8650, GNorm = 1.4203, lr_0 = 8.0689e-04
Loss = 4.0776e-01, PNorm = 46.8716, GNorm = 2.0049, lr_0 = 8.0634e-04
Loss = 4.9477e-01, PNorm = 46.8843, GNorm = 1.4426, lr_0 = 8.0579e-04
Loss = 4.6453e-01, PNorm = 46.8925, GNorm = 1.1672, lr_0 = 8.0523e-04
Loss = 4.6574e-01, PNorm = 46.9056, GNorm = 1.2765, lr_0 = 8.0468e-04
Loss = 4.4781e-01, PNorm = 46.9270, GNorm = 1.6364, lr_0 = 8.0413e-04
Loss = 5.1207e-01, PNorm = 46.9426, GNorm = 1.4195, lr_0 = 8.0358e-04
Loss = 4.4440e-01, PNorm = 46.9645, GNorm = 2.2507, lr_0 = 8.0303e-04
Loss = 4.9863e-01, PNorm = 46.9912, GNorm = 1.6517, lr_0 = 8.0248e-04
Loss = 5.4238e-01, PNorm = 47.0218, GNorm = 2.1519, lr_0 = 8.0193e-04
Loss = 4.6161e-01, PNorm = 47.0535, GNorm = 1.9152, lr_0 = 8.0138e-04
Loss = 5.0125e-01, PNorm = 47.0689, GNorm = 1.1179, lr_0 = 8.0083e-04
Loss = 4.2394e-01, PNorm = 47.0903, GNorm = 1.3586, lr_0 = 8.0028e-04
Loss = 4.6076e-01, PNorm = 47.1040, GNorm = 1.9324, lr_0 = 7.9974e-04
Loss = 4.6980e-01, PNorm = 47.1177, GNorm = 2.4768, lr_0 = 7.9919e-04
Loss = 5.3782e-01, PNorm = 47.1261, GNorm = 1.6265, lr_0 = 7.9864e-04
Loss = 4.7890e-01, PNorm = 47.1478, GNorm = 1.2237, lr_0 = 7.9809e-04
Loss = 5.2067e-01, PNorm = 47.1779, GNorm = 1.2611, lr_0 = 7.9755e-04
Loss = 5.0892e-01, PNorm = 47.1953, GNorm = 1.1629, lr_0 = 7.9700e-04
Loss = 4.9261e-01, PNorm = 47.2109, GNorm = 1.3171, lr_0 = 7.9645e-04
Loss = 4.7243e-01, PNorm = 47.2210, GNorm = 1.2263, lr_0 = 7.9591e-04
Loss = 4.7079e-01, PNorm = 47.2375, GNorm = 1.0248, lr_0 = 7.9536e-04
Loss = 4.2734e-01, PNorm = 47.2625, GNorm = 1.1676, lr_0 = 7.9482e-04
Loss = 4.5538e-01, PNorm = 47.2801, GNorm = 1.1649, lr_0 = 7.9427e-04
Loss = 4.4749e-01, PNorm = 47.2965, GNorm = 1.1710, lr_0 = 7.9373e-04
Loss = 5.3032e-01, PNorm = 47.3192, GNorm = 1.0115, lr_0 = 7.9319e-04
Loss = 4.5930e-01, PNorm = 47.3458, GNorm = 0.9141, lr_0 = 7.9264e-04
Loss = 5.1055e-01, PNorm = 47.3609, GNorm = 1.3525, lr_0 = 7.9210e-04
Loss = 4.5760e-01, PNorm = 47.3724, GNorm = 1.4808, lr_0 = 7.9156e-04
Loss = 4.5031e-01, PNorm = 47.3872, GNorm = 1.1687, lr_0 = 7.9101e-04
Loss = 4.6932e-01, PNorm = 47.4092, GNorm = 1.2429, lr_0 = 7.9047e-04
Loss = 5.6026e-01, PNorm = 47.4315, GNorm = 1.1010, lr_0 = 7.8993e-04
Loss = 4.8944e-01, PNorm = 47.4547, GNorm = 2.5057, lr_0 = 7.8939e-04
Loss = 5.6223e-01, PNorm = 47.4751, GNorm = 2.6859, lr_0 = 7.8885e-04
Loss = 4.9594e-01, PNorm = 47.4907, GNorm = 1.2424, lr_0 = 7.8831e-04
Loss = 4.2683e-01, PNorm = 47.5078, GNorm = 1.1301, lr_0 = 7.8777e-04
Loss = 5.1305e-01, PNorm = 47.5293, GNorm = 2.0990, lr_0 = 7.8723e-04
Loss = 5.3144e-01, PNorm = 47.5506, GNorm = 1.9115, lr_0 = 7.8669e-04
Loss = 5.5657e-01, PNorm = 47.5849, GNorm = 1.2942, lr_0 = 7.8615e-04
Loss = 4.9169e-01, PNorm = 47.6070, GNorm = 1.5754, lr_0 = 7.8561e-04
Loss = 4.1343e-01, PNorm = 47.6244, GNorm = 0.8249, lr_0 = 7.8507e-04
Loss = 5.0073e-01, PNorm = 47.6410, GNorm = 1.3261, lr_0 = 7.8454e-04
Loss = 5.0500e-01, PNorm = 47.6599, GNorm = 2.8325, lr_0 = 7.8400e-04
Loss = 5.3031e-01, PNorm = 47.6798, GNorm = 1.3059, lr_0 = 7.8346e-04
Loss = 4.8864e-01, PNorm = 47.7056, GNorm = 2.1561, lr_0 = 7.8293e-04
Loss = 4.0019e-01, PNorm = 47.7288, GNorm = 1.5581, lr_0 = 7.8239e-04
Loss = 5.0941e-01, PNorm = 47.7496, GNorm = 1.4753, lr_0 = 7.8185e-04
Loss = 5.2893e-01, PNorm = 47.7711, GNorm = 1.1164, lr_0 = 7.8132e-04
Validation mae = 0.119895
Epoch 5
Loss = 4.2941e-01, PNorm = 47.7889, GNorm = 1.4727, lr_0 = 7.8078e-04
Loss = 4.6064e-01, PNorm = 47.7961, GNorm = 1.5065, lr_0 = 7.8025e-04
Loss = 4.6064e-01, PNorm = 47.8099, GNorm = 1.7982, lr_0 = 7.7971e-04
Loss = 4.2023e-01, PNorm = 47.8269, GNorm = 1.2892, lr_0 = 7.7918e-04
Loss = 4.9657e-01, PNorm = 47.8480, GNorm = 1.2075, lr_0 = 7.7864e-04
Loss = 4.0876e-01, PNorm = 47.8714, GNorm = 1.6350, lr_0 = 7.7811e-04
Loss = 5.1910e-01, PNorm = 47.8891, GNorm = 1.9918, lr_0 = 7.7758e-04
Loss = 4.5833e-01, PNorm = 47.9127, GNorm = 1.3253, lr_0 = 7.7705e-04
Loss = 4.0872e-01, PNorm = 47.9323, GNorm = 2.8434, lr_0 = 7.7651e-04
Loss = 5.1736e-01, PNorm = 47.9579, GNorm = 1.3466, lr_0 = 7.7598e-04
Loss = 4.9247e-01, PNorm = 47.9784, GNorm = 3.2374, lr_0 = 7.7545e-04
Loss = 5.1001e-01, PNorm = 48.0100, GNorm = 1.2517, lr_0 = 7.7492e-04
Loss = 4.4717e-01, PNorm = 48.0284, GNorm = 1.8045, lr_0 = 7.7439e-04
Loss = 5.3015e-01, PNorm = 48.0472, GNorm = 1.5823, lr_0 = 7.7386e-04
Loss = 4.9623e-01, PNorm = 48.0676, GNorm = 2.3966, lr_0 = 7.7333e-04
Loss = 5.0633e-01, PNorm = 48.0927, GNorm = 1.0768, lr_0 = 7.7280e-04
Loss = 5.8229e-01, PNorm = 48.1117, GNorm = 1.9159, lr_0 = 7.7227e-04
Loss = 4.8464e-01, PNorm = 48.1400, GNorm = 1.0128, lr_0 = 7.7174e-04
Loss = 4.8278e-01, PNorm = 48.1687, GNorm = 1.4906, lr_0 = 7.7121e-04
Loss = 4.9339e-01, PNorm = 48.1954, GNorm = 1.8993, lr_0 = 7.7068e-04
Loss = 4.7812e-01, PNorm = 48.2207, GNorm = 1.6494, lr_0 = 7.7015e-04
Loss = 4.9980e-01, PNorm = 48.2312, GNorm = 2.0835, lr_0 = 7.6963e-04
Loss = 4.5357e-01, PNorm = 48.2524, GNorm = 1.2107, lr_0 = 7.6910e-04
Loss = 5.1208e-01, PNorm = 48.2685, GNorm = 0.7370, lr_0 = 7.6857e-04
Loss = 4.2350e-01, PNorm = 48.2834, GNorm = 1.2140, lr_0 = 7.6805e-04
Loss = 4.6754e-01, PNorm = 48.2990, GNorm = 3.2616, lr_0 = 7.6752e-04
Loss = 4.4116e-01, PNorm = 48.3185, GNorm = 1.9189, lr_0 = 7.6699e-04
Loss = 4.6242e-01, PNorm = 48.3454, GNorm = 1.2434, lr_0 = 7.6647e-04
Loss = 4.4346e-01, PNorm = 48.3642, GNorm = 1.0372, lr_0 = 7.6594e-04
Loss = 4.8384e-01, PNorm = 48.3799, GNorm = 1.5829, lr_0 = 7.6542e-04
Loss = 5.2609e-01, PNorm = 48.3988, GNorm = 1.1026, lr_0 = 7.6489e-04
Loss = 4.7414e-01, PNorm = 48.4149, GNorm = 1.1056, lr_0 = 7.6437e-04
Loss = 3.9701e-01, PNorm = 48.4287, GNorm = 1.1509, lr_0 = 7.6385e-04
Loss = 5.3366e-01, PNorm = 48.4493, GNorm = 1.5952, lr_0 = 7.6332e-04
Loss = 4.7964e-01, PNorm = 48.4684, GNorm = 1.6242, lr_0 = 7.6280e-04
Loss = 4.3857e-01, PNorm = 48.4795, GNorm = 1.4135, lr_0 = 7.6228e-04
Loss = 4.8715e-01, PNorm = 48.4935, GNorm = 1.6560, lr_0 = 7.6176e-04
Loss = 5.4193e-01, PNorm = 48.5140, GNorm = 1.3157, lr_0 = 7.6123e-04
Loss = 5.1852e-01, PNorm = 48.5293, GNorm = 1.1702, lr_0 = 7.6071e-04
Loss = 4.6023e-01, PNorm = 48.5551, GNorm = 1.1082, lr_0 = 7.6019e-04
Loss = 5.3420e-01, PNorm = 48.5765, GNorm = 3.7246, lr_0 = 7.5967e-04
Loss = 5.0582e-01, PNorm = 48.5980, GNorm = 0.9657, lr_0 = 7.5915e-04
Loss = 4.6770e-01, PNorm = 48.6198, GNorm = 0.9298, lr_0 = 7.5863e-04
Loss = 4.1418e-01, PNorm = 48.6360, GNorm = 1.0653, lr_0 = 7.5811e-04
Loss = 5.1463e-01, PNorm = 48.6535, GNorm = 1.7899, lr_0 = 7.5759e-04
Loss = 4.8390e-01, PNorm = 48.6693, GNorm = 2.5119, lr_0 = 7.5707e-04
Loss = 4.2710e-01, PNorm = 48.6852, GNorm = 1.5176, lr_0 = 7.5655e-04
Loss = 4.4050e-01, PNorm = 48.7025, GNorm = 3.0474, lr_0 = 7.5603e-04
Loss = 5.1607e-01, PNorm = 48.7181, GNorm = 1.3478, lr_0 = 7.5552e-04
Loss = 4.5837e-01, PNorm = 48.7410, GNorm = 1.5186, lr_0 = 7.5500e-04
Loss = 4.3051e-01, PNorm = 48.7568, GNorm = 1.0077, lr_0 = 7.5448e-04
Loss = 4.0974e-01, PNorm = 48.7627, GNorm = 1.2057, lr_0 = 7.5397e-04
Loss = 4.6172e-01, PNorm = 48.7809, GNorm = 1.2526, lr_0 = 7.5345e-04
Loss = 4.2898e-01, PNorm = 48.7970, GNorm = 1.2101, lr_0 = 7.5293e-04
Loss = 4.7556e-01, PNorm = 48.8056, GNorm = 1.3793, lr_0 = 7.5242e-04
Loss = 4.7057e-01, PNorm = 48.8200, GNorm = 1.2212, lr_0 = 7.5190e-04
Loss = 4.7841e-01, PNorm = 48.8400, GNorm = 1.1313, lr_0 = 7.5139e-04
Loss = 4.7959e-01, PNorm = 48.8510, GNorm = 1.0628, lr_0 = 7.5087e-04
Loss = 4.1671e-01, PNorm = 48.8703, GNorm = 1.1959, lr_0 = 7.5036e-04
Loss = 4.5364e-01, PNorm = 48.8877, GNorm = 1.3112, lr_0 = 7.4984e-04
Loss = 4.1017e-01, PNorm = 48.9069, GNorm = 0.8181, lr_0 = 7.4933e-04
Loss = 4.5779e-01, PNorm = 48.9182, GNorm = 1.2862, lr_0 = 7.4882e-04
Loss = 4.4319e-01, PNorm = 48.9324, GNorm = 1.4410, lr_0 = 7.4830e-04
Loss = 4.9863e-01, PNorm = 48.9486, GNorm = 1.4312, lr_0 = 7.4779e-04
Loss = 4.2616e-01, PNorm = 48.9623, GNorm = 1.5385, lr_0 = 7.4728e-04
Loss = 4.7578e-01, PNorm = 48.9777, GNorm = 1.2807, lr_0 = 7.4677e-04
Loss = 4.5155e-01, PNorm = 49.0006, GNorm = 1.8483, lr_0 = 7.4625e-04
Loss = 4.7152e-01, PNorm = 49.0155, GNorm = 1.2717, lr_0 = 7.4574e-04
Loss = 4.7729e-01, PNorm = 49.0364, GNorm = 2.0879, lr_0 = 7.4523e-04
Loss = 4.5079e-01, PNorm = 49.0537, GNorm = 3.2983, lr_0 = 7.4472e-04
Loss = 4.2955e-01, PNorm = 49.0750, GNorm = 0.9740, lr_0 = 7.4421e-04
Loss = 4.5132e-01, PNorm = 49.0870, GNorm = 1.1661, lr_0 = 7.4370e-04
Loss = 5.7284e-01, PNorm = 49.1000, GNorm = 1.0231, lr_0 = 7.4319e-04
Loss = 4.9005e-01, PNorm = 49.1162, GNorm = 1.8264, lr_0 = 7.4268e-04
Loss = 4.0986e-01, PNorm = 49.1352, GNorm = 1.8025, lr_0 = 7.4217e-04
Loss = 4.6481e-01, PNorm = 49.1469, GNorm = 1.2398, lr_0 = 7.4167e-04
Loss = 5.0247e-01, PNorm = 49.1530, GNorm = 2.0583, lr_0 = 7.4116e-04
Loss = 5.9929e-01, PNorm = 49.1702, GNorm = 2.3717, lr_0 = 7.4065e-04
Loss = 4.7505e-01, PNorm = 49.1894, GNorm = 2.0846, lr_0 = 7.4014e-04
Loss = 5.1009e-01, PNorm = 49.2051, GNorm = 1.0957, lr_0 = 7.3964e-04
Loss = 4.7606e-01, PNorm = 49.2268, GNorm = 1.5622, lr_0 = 7.3913e-04
Loss = 4.4501e-01, PNorm = 49.2450, GNorm = 1.0700, lr_0 = 7.3862e-04
Loss = 4.4298e-01, PNorm = 49.2601, GNorm = 0.8749, lr_0 = 7.3812e-04
Loss = 4.0999e-01, PNorm = 49.2759, GNorm = 1.2919, lr_0 = 7.3761e-04
Loss = 4.4049e-01, PNorm = 49.2935, GNorm = 1.8635, lr_0 = 7.3711e-04
Loss = 4.5802e-01, PNorm = 49.3025, GNorm = 1.9835, lr_0 = 7.3660e-04
Loss = 5.5446e-01, PNorm = 49.3082, GNorm = 1.5603, lr_0 = 7.3610e-04
Loss = 4.5105e-01, PNorm = 49.3284, GNorm = 1.5049, lr_0 = 7.3559e-04
Loss = 5.0133e-01, PNorm = 49.3491, GNorm = 1.7044, lr_0 = 7.3509e-04
Loss = 4.5783e-01, PNorm = 49.3726, GNorm = 1.4850, lr_0 = 7.3458e-04
Loss = 4.3722e-01, PNorm = 49.3875, GNorm = 1.0712, lr_0 = 7.3408e-04
Loss = 4.9496e-01, PNorm = 49.4053, GNorm = 3.4163, lr_0 = 7.3358e-04
Loss = 5.1014e-01, PNorm = 49.4249, GNorm = 1.5131, lr_0 = 7.3308e-04
Loss = 4.7692e-01, PNorm = 49.4425, GNorm = 1.4412, lr_0 = 7.3257e-04
Loss = 4.9424e-01, PNorm = 49.4584, GNorm = 1.4427, lr_0 = 7.3207e-04
Loss = 4.5795e-01, PNorm = 49.4724, GNorm = 1.7927, lr_0 = 7.3157e-04
Loss = 4.8691e-01, PNorm = 49.4871, GNorm = 1.5535, lr_0 = 7.3107e-04
Loss = 4.5626e-01, PNorm = 49.5013, GNorm = 1.6908, lr_0 = 7.3057e-04
Loss = 4.9132e-01, PNorm = 49.5168, GNorm = 1.1440, lr_0 = 7.3007e-04
Loss = 4.4435e-01, PNorm = 49.5281, GNorm = 2.2489, lr_0 = 7.2957e-04
Loss = 4.5017e-01, PNorm = 49.5459, GNorm = 2.1174, lr_0 = 7.2907e-04
Loss = 5.0784e-01, PNorm = 49.5624, GNorm = 1.2112, lr_0 = 7.2857e-04
Loss = 4.7792e-01, PNorm = 49.5766, GNorm = 1.9766, lr_0 = 7.2807e-04
Loss = 4.5915e-01, PNorm = 49.5960, GNorm = 1.3977, lr_0 = 7.2757e-04
Loss = 4.7936e-01, PNorm = 49.6114, GNorm = 1.0270, lr_0 = 7.2707e-04
Loss = 4.5631e-01, PNorm = 49.6269, GNorm = 2.9480, lr_0 = 7.2657e-04
Loss = 4.7427e-01, PNorm = 49.6455, GNorm = 2.0205, lr_0 = 7.2608e-04
Loss = 4.2195e-01, PNorm = 49.6614, GNorm = 1.3560, lr_0 = 7.2558e-04
Loss = 4.6575e-01, PNorm = 49.6750, GNorm = 2.2616, lr_0 = 7.2508e-04
Loss = 5.6780e-01, PNorm = 49.6959, GNorm = 1.9027, lr_0 = 7.2458e-04
Loss = 4.8315e-01, PNorm = 49.7199, GNorm = 1.5142, lr_0 = 7.2409e-04
Loss = 4.0744e-01, PNorm = 49.7296, GNorm = 1.1475, lr_0 = 7.2359e-04
Loss = 4.4108e-01, PNorm = 49.7415, GNorm = 1.0131, lr_0 = 7.2310e-04
Loss = 4.7500e-01, PNorm = 49.7546, GNorm = 1.6056, lr_0 = 7.2260e-04
Loss = 4.3859e-01, PNorm = 49.7643, GNorm = 1.1525, lr_0 = 7.2211e-04
Loss = 4.5820e-01, PNorm = 49.7810, GNorm = 1.3711, lr_0 = 7.2161e-04
Loss = 4.6517e-01, PNorm = 49.8025, GNorm = 1.6066, lr_0 = 7.2112e-04
Loss = 4.4824e-01, PNorm = 49.8254, GNorm = 1.2115, lr_0 = 7.2062e-04
Loss = 4.7648e-01, PNorm = 49.8488, GNorm = 0.8790, lr_0 = 7.2013e-04
Loss = 4.4631e-01, PNorm = 49.8573, GNorm = 1.0842, lr_0 = 7.1964e-04
Validation mae = 0.118939
Epoch 6
Loss = 4.9166e-01, PNorm = 49.8710, GNorm = 2.0159, lr_0 = 7.1914e-04
Loss = 5.0111e-01, PNorm = 49.8924, GNorm = 1.2288, lr_0 = 7.1865e-04
Loss = 4.3082e-01, PNorm = 49.9089, GNorm = 1.4345, lr_0 = 7.1816e-04
Loss = 4.6018e-01, PNorm = 49.9297, GNorm = 0.9963, lr_0 = 7.1767e-04
Loss = 5.7855e-01, PNorm = 49.9523, GNorm = 0.8356, lr_0 = 7.1717e-04
Loss = 4.4634e-01, PNorm = 49.9719, GNorm = 1.8023, lr_0 = 7.1668e-04
Loss = 4.8627e-01, PNorm = 49.9866, GNorm = 1.7982, lr_0 = 7.1619e-04
Loss = 5.1140e-01, PNorm = 50.0021, GNorm = 1.2656, lr_0 = 7.1570e-04
Loss = 4.7555e-01, PNorm = 50.0228, GNorm = 1.3196, lr_0 = 7.1521e-04
Loss = 4.5727e-01, PNorm = 50.0385, GNorm = 1.5474, lr_0 = 7.1472e-04
Loss = 5.1444e-01, PNorm = 50.0571, GNorm = 1.1467, lr_0 = 7.1423e-04
Loss = 4.8862e-01, PNorm = 50.0749, GNorm = 1.5817, lr_0 = 7.1374e-04
Loss = 4.7334e-01, PNorm = 50.0941, GNorm = 1.4716, lr_0 = 7.1325e-04
Loss = 5.0040e-01, PNorm = 50.1135, GNorm = 2.0117, lr_0 = 7.1277e-04
Loss = 4.1912e-01, PNorm = 50.1319, GNorm = 1.0813, lr_0 = 7.1228e-04
Loss = 4.9129e-01, PNorm = 50.1465, GNorm = 1.4388, lr_0 = 7.1179e-04
Loss = 4.5231e-01, PNorm = 50.1713, GNorm = 2.2629, lr_0 = 7.1130e-04
Loss = 4.0490e-01, PNorm = 50.1853, GNorm = 1.2361, lr_0 = 7.1081e-04
Loss = 4.4379e-01, PNorm = 50.1986, GNorm = 1.0768, lr_0 = 7.1033e-04
Loss = 4.3380e-01, PNorm = 50.2201, GNorm = 1.3127, lr_0 = 7.0984e-04
Loss = 4.7537e-01, PNorm = 50.2371, GNorm = 1.4459, lr_0 = 7.0935e-04
Loss = 4.5337e-01, PNorm = 50.2595, GNorm = 2.6242, lr_0 = 7.0887e-04
Loss = 4.9258e-01, PNorm = 50.2703, GNorm = 1.5708, lr_0 = 7.0838e-04
Loss = 4.7553e-01, PNorm = 50.2872, GNorm = 1.2384, lr_0 = 7.0790e-04
Loss = 4.5254e-01, PNorm = 50.2980, GNorm = 1.1075, lr_0 = 7.0741e-04
Loss = 4.3390e-01, PNorm = 50.3068, GNorm = 1.6300, lr_0 = 7.0693e-04
Loss = 4.3628e-01, PNorm = 50.3199, GNorm = 0.9554, lr_0 = 7.0644e-04
Loss = 4.1753e-01, PNorm = 50.3399, GNorm = 1.4922, lr_0 = 7.0596e-04
Loss = 4.3543e-01, PNorm = 50.3580, GNorm = 1.3504, lr_0 = 7.0548e-04
Loss = 3.8587e-01, PNorm = 50.3703, GNorm = 1.8968, lr_0 = 7.0499e-04
Loss = 4.9952e-01, PNorm = 50.3842, GNorm = 1.5879, lr_0 = 7.0451e-04
Loss = 4.5441e-01, PNorm = 50.3981, GNorm = 1.2225, lr_0 = 7.0403e-04
Loss = 4.1055e-01, PNorm = 50.4161, GNorm = 1.2082, lr_0 = 7.0354e-04
Loss = 4.4968e-01, PNorm = 50.4283, GNorm = 1.2915, lr_0 = 7.0306e-04
Loss = 4.6176e-01, PNorm = 50.4407, GNorm = 1.5116, lr_0 = 7.0258e-04
Loss = 4.4474e-01, PNorm = 50.4589, GNorm = 0.9320, lr_0 = 7.0210e-04
Loss = 4.6084e-01, PNorm = 50.4722, GNorm = 1.6897, lr_0 = 7.0162e-04
Loss = 4.3037e-01, PNorm = 50.4881, GNorm = 1.4529, lr_0 = 7.0114e-04
Loss = 4.6988e-01, PNorm = 50.5036, GNorm = 2.5004, lr_0 = 7.0066e-04
Loss = 4.5907e-01, PNorm = 50.5128, GNorm = 1.7921, lr_0 = 7.0018e-04
Loss = 4.4282e-01, PNorm = 50.5322, GNorm = 1.1935, lr_0 = 6.9970e-04
Loss = 3.9041e-01, PNorm = 50.5439, GNorm = 1.0626, lr_0 = 6.9922e-04
Loss = 4.3405e-01, PNorm = 50.5584, GNorm = 0.8720, lr_0 = 6.9874e-04
Loss = 5.2281e-01, PNorm = 50.5787, GNorm = 1.4757, lr_0 = 6.9826e-04
Loss = 4.4399e-01, PNorm = 50.5904, GNorm = 1.1355, lr_0 = 6.9778e-04
Loss = 4.2146e-01, PNorm = 50.6013, GNorm = 1.4179, lr_0 = 6.9730e-04
Loss = 4.5984e-01, PNorm = 50.6163, GNorm = 1.0553, lr_0 = 6.9683e-04
Loss = 4.5759e-01, PNorm = 50.6336, GNorm = 2.1759, lr_0 = 6.9635e-04
Loss = 4.8864e-01, PNorm = 50.6437, GNorm = 1.5150, lr_0 = 6.9587e-04
Loss = 4.2187e-01, PNorm = 50.6585, GNorm = 1.6725, lr_0 = 6.9540e-04
Loss = 4.0085e-01, PNorm = 50.6729, GNorm = 1.6824, lr_0 = 6.9492e-04
Loss = 4.3525e-01, PNorm = 50.6868, GNorm = 1.2200, lr_0 = 6.9444e-04
Loss = 3.9189e-01, PNorm = 50.7053, GNorm = 1.1926, lr_0 = 6.9397e-04
Loss = 4.5885e-01, PNorm = 50.7286, GNorm = 0.9053, lr_0 = 6.9349e-04
Loss = 4.4409e-01, PNorm = 50.7330, GNorm = 1.9889, lr_0 = 6.9302e-04
Loss = 4.3504e-01, PNorm = 50.7529, GNorm = 1.6654, lr_0 = 6.9254e-04
Loss = 4.7632e-01, PNorm = 50.7706, GNorm = 1.2439, lr_0 = 6.9207e-04
Loss = 5.8415e-01, PNorm = 50.7855, GNorm = 1.7832, lr_0 = 6.9159e-04
Loss = 4.5866e-01, PNorm = 50.7928, GNorm = 1.4285, lr_0 = 6.9112e-04
Loss = 4.5640e-01, PNorm = 50.8140, GNorm = 1.3223, lr_0 = 6.9065e-04
Loss = 4.5764e-01, PNorm = 50.8317, GNorm = 1.7766, lr_0 = 6.9017e-04
Loss = 4.9880e-01, PNorm = 50.8509, GNorm = 1.3091, lr_0 = 6.8970e-04
Loss = 4.5918e-01, PNorm = 50.8728, GNorm = 1.8577, lr_0 = 6.8923e-04
Loss = 4.2108e-01, PNorm = 50.8883, GNorm = 1.0603, lr_0 = 6.8876e-04
Loss = 4.1426e-01, PNorm = 50.9020, GNorm = 1.4332, lr_0 = 6.8828e-04
Loss = 5.2285e-01, PNorm = 50.9175, GNorm = 2.1232, lr_0 = 6.8781e-04
Loss = 4.8283e-01, PNorm = 50.9359, GNorm = 1.2494, lr_0 = 6.8734e-04
Loss = 4.6242e-01, PNorm = 50.9578, GNorm = 1.8194, lr_0 = 6.8687e-04
Loss = 4.6910e-01, PNorm = 50.9761, GNorm = 2.0295, lr_0 = 6.8640e-04
Loss = 4.8160e-01, PNorm = 50.9885, GNorm = 1.7872, lr_0 = 6.8593e-04
Loss = 4.8089e-01, PNorm = 50.9973, GNorm = 0.9353, lr_0 = 6.8546e-04
Loss = 4.5898e-01, PNorm = 51.0090, GNorm = 1.6472, lr_0 = 6.8499e-04
Loss = 4.8134e-01, PNorm = 51.0157, GNorm = 1.0904, lr_0 = 6.8452e-04
Loss = 4.5617e-01, PNorm = 51.0221, GNorm = 1.2066, lr_0 = 6.8405e-04
Loss = 4.3589e-01, PNorm = 51.0420, GNorm = 1.3344, lr_0 = 6.8358e-04
Loss = 4.8642e-01, PNorm = 51.0616, GNorm = 0.9654, lr_0 = 6.8312e-04
Loss = 4.8339e-01, PNorm = 51.0743, GNorm = 1.3505, lr_0 = 6.8265e-04
Loss = 4.8208e-01, PNorm = 51.0945, GNorm = 1.1341, lr_0 = 6.8218e-04
Loss = 4.6992e-01, PNorm = 51.1133, GNorm = 1.3863, lr_0 = 6.8171e-04
Loss = 4.6977e-01, PNorm = 51.1248, GNorm = 1.3046, lr_0 = 6.8125e-04
Loss = 4.7300e-01, PNorm = 51.1410, GNorm = 0.9891, lr_0 = 6.8078e-04
Loss = 4.6995e-01, PNorm = 51.1466, GNorm = 1.1591, lr_0 = 6.8031e-04
Loss = 4.1868e-01, PNorm = 51.1595, GNorm = 1.2378, lr_0 = 6.7985e-04
Loss = 4.5620e-01, PNorm = 51.1746, GNorm = 1.2619, lr_0 = 6.7938e-04
Loss = 4.3278e-01, PNorm = 51.1937, GNorm = 1.5484, lr_0 = 6.7892e-04
Loss = 4.7107e-01, PNorm = 51.2082, GNorm = 1.5296, lr_0 = 6.7845e-04
Loss = 4.2151e-01, PNorm = 51.2275, GNorm = 1.0736, lr_0 = 6.7799e-04
Loss = 4.4679e-01, PNorm = 51.2456, GNorm = 1.0118, lr_0 = 6.7752e-04
Loss = 5.2122e-01, PNorm = 51.2597, GNorm = 1.4842, lr_0 = 6.7706e-04
Loss = 4.5341e-01, PNorm = 51.2812, GNorm = 1.9934, lr_0 = 6.7659e-04
Loss = 5.2721e-01, PNorm = 51.2966, GNorm = 1.6035, lr_0 = 6.7613e-04
Loss = 3.9685e-01, PNorm = 51.3242, GNorm = 1.7849, lr_0 = 6.7567e-04
Loss = 4.2059e-01, PNorm = 51.3400, GNorm = 1.3646, lr_0 = 6.7520e-04
Loss = 4.9672e-01, PNorm = 51.3456, GNorm = 1.3661, lr_0 = 6.7474e-04
Loss = 3.6634e-01, PNorm = 51.3517, GNorm = 1.0609, lr_0 = 6.7428e-04
Loss = 4.9884e-01, PNorm = 51.3664, GNorm = 1.5342, lr_0 = 6.7382e-04
Loss = 4.1548e-01, PNorm = 51.3885, GNorm = 1.5807, lr_0 = 6.7335e-04
Loss = 4.6109e-01, PNorm = 51.4024, GNorm = 1.4635, lr_0 = 6.7289e-04
Loss = 4.6259e-01, PNorm = 51.4156, GNorm = 1.8491, lr_0 = 6.7243e-04
Loss = 4.6653e-01, PNorm = 51.4375, GNorm = 0.8936, lr_0 = 6.7197e-04
Loss = 4.1250e-01, PNorm = 51.4595, GNorm = 0.8107, lr_0 = 6.7151e-04
Loss = 4.2310e-01, PNorm = 51.4717, GNorm = 0.8875, lr_0 = 6.7105e-04
Loss = 4.3854e-01, PNorm = 51.4838, GNorm = 1.1759, lr_0 = 6.7059e-04
Loss = 4.9102e-01, PNorm = 51.4957, GNorm = 1.3166, lr_0 = 6.7013e-04
Loss = 5.1838e-01, PNorm = 51.5085, GNorm = 1.5535, lr_0 = 6.6967e-04
Loss = 4.7094e-01, PNorm = 51.5267, GNorm = 1.0386, lr_0 = 6.6921e-04
Loss = 4.7270e-01, PNorm = 51.5425, GNorm = 1.0372, lr_0 = 6.6876e-04
Loss = 5.0918e-01, PNorm = 51.5523, GNorm = 1.1190, lr_0 = 6.6830e-04
Loss = 4.6504e-01, PNorm = 51.5598, GNorm = 0.8901, lr_0 = 6.6784e-04
Loss = 4.0377e-01, PNorm = 51.5756, GNorm = 1.3160, lr_0 = 6.6738e-04
Loss = 4.1482e-01, PNorm = 51.5949, GNorm = 1.2245, lr_0 = 6.6693e-04
Loss = 5.2633e-01, PNorm = 51.6192, GNorm = 1.0369, lr_0 = 6.6647e-04
Loss = 4.1700e-01, PNorm = 51.6370, GNorm = 1.2510, lr_0 = 6.6601e-04
Loss = 4.4993e-01, PNorm = 51.6463, GNorm = 1.1335, lr_0 = 6.6556e-04
Loss = 4.6103e-01, PNorm = 51.6574, GNorm = 2.1896, lr_0 = 6.6510e-04
Loss = 4.9686e-01, PNorm = 51.6724, GNorm = 1.9462, lr_0 = 6.6464e-04
Loss = 4.7343e-01, PNorm = 51.6843, GNorm = 1.2281, lr_0 = 6.6419e-04
Loss = 4.8800e-01, PNorm = 51.6981, GNorm = 1.9033, lr_0 = 6.6373e-04
Loss = 4.1026e-01, PNorm = 51.7054, GNorm = 0.8807, lr_0 = 6.6328e-04
Loss = 4.5046e-01, PNorm = 51.7250, GNorm = 1.2904, lr_0 = 6.6282e-04
Validation mae = 0.119335
Epoch 7
Loss = 5.0660e-01, PNorm = 51.7340, GNorm = 1.3752, lr_0 = 6.6237e-04
Loss = 4.7693e-01, PNorm = 51.7527, GNorm = 1.4921, lr_0 = 6.6192e-04
Loss = 4.9703e-01, PNorm = 51.7706, GNorm = 1.8697, lr_0 = 6.6146e-04
Loss = 4.5208e-01, PNorm = 51.7815, GNorm = 1.3431, lr_0 = 6.6101e-04
Loss = 4.5942e-01, PNorm = 51.8015, GNorm = 0.8110, lr_0 = 6.6056e-04
Loss = 4.4275e-01, PNorm = 51.8176, GNorm = 1.3693, lr_0 = 6.6011e-04
Loss = 4.5702e-01, PNorm = 51.8319, GNorm = 1.9180, lr_0 = 6.5965e-04
Loss = 4.2396e-01, PNorm = 51.8447, GNorm = 1.9008, lr_0 = 6.5920e-04
Loss = 4.4877e-01, PNorm = 51.8642, GNorm = 1.0995, lr_0 = 6.5875e-04
Loss = 4.1103e-01, PNorm = 51.8814, GNorm = 1.8117, lr_0 = 6.5830e-04
Loss = 4.6050e-01, PNorm = 51.8953, GNorm = 1.3735, lr_0 = 6.5785e-04
Loss = 4.8167e-01, PNorm = 51.9075, GNorm = 2.0842, lr_0 = 6.5740e-04
Loss = 4.6218e-01, PNorm = 51.9252, GNorm = 1.1922, lr_0 = 6.5695e-04
Loss = 4.0756e-01, PNorm = 51.9448, GNorm = 1.6583, lr_0 = 6.5650e-04
Loss = 4.2852e-01, PNorm = 51.9614, GNorm = 1.3052, lr_0 = 6.5605e-04
Loss = 4.6696e-01, PNorm = 51.9679, GNorm = 1.0888, lr_0 = 6.5560e-04
Loss = 4.3718e-01, PNorm = 51.9766, GNorm = 1.3936, lr_0 = 6.5515e-04
Loss = 4.0777e-01, PNorm = 51.9927, GNorm = 0.9613, lr_0 = 6.5470e-04
Loss = 3.7692e-01, PNorm = 52.0068, GNorm = 0.8572, lr_0 = 6.5425e-04
Loss = 4.3333e-01, PNorm = 52.0228, GNorm = 1.9608, lr_0 = 6.5380e-04
Loss = 4.7529e-01, PNorm = 52.0364, GNorm = 1.2412, lr_0 = 6.5335e-04
Loss = 4.2211e-01, PNorm = 52.0577, GNorm = 1.5017, lr_0 = 6.5291e-04
Loss = 4.5404e-01, PNorm = 52.0750, GNorm = 1.3910, lr_0 = 6.5246e-04
Loss = 4.4093e-01, PNorm = 52.0890, GNorm = 1.2858, lr_0 = 6.5201e-04
Loss = 4.4249e-01, PNorm = 52.1060, GNorm = 1.7448, lr_0 = 6.5157e-04
Loss = 4.7845e-01, PNorm = 52.1157, GNorm = 1.3154, lr_0 = 6.5112e-04
Loss = 3.9804e-01, PNorm = 52.1272, GNorm = 1.3891, lr_0 = 6.5067e-04
Loss = 4.3866e-01, PNorm = 52.1446, GNorm = 1.4396, lr_0 = 6.5023e-04
Loss = 4.3453e-01, PNorm = 52.1596, GNorm = 1.3254, lr_0 = 6.4978e-04
Loss = 4.4810e-01, PNorm = 52.1762, GNorm = 1.1202, lr_0 = 6.4934e-04
Loss = 4.3683e-01, PNorm = 52.1859, GNorm = 1.2440, lr_0 = 6.4889e-04
Loss = 4.3021e-01, PNorm = 52.1932, GNorm = 1.3522, lr_0 = 6.4845e-04
Loss = 4.4174e-01, PNorm = 52.2125, GNorm = 1.1040, lr_0 = 6.4800e-04
Loss = 4.1416e-01, PNorm = 52.2207, GNorm = 1.3682, lr_0 = 6.4756e-04
Loss = 4.3837e-01, PNorm = 52.2279, GNorm = 1.2822, lr_0 = 6.4712e-04
Loss = 4.6516e-01, PNorm = 52.2518, GNorm = 1.3129, lr_0 = 6.4667e-04
Loss = 4.5896e-01, PNorm = 52.2668, GNorm = 2.1757, lr_0 = 6.4623e-04
Loss = 4.2654e-01, PNorm = 52.2781, GNorm = 1.1064, lr_0 = 6.4579e-04
Loss = 4.7428e-01, PNorm = 52.2924, GNorm = 1.4521, lr_0 = 6.4534e-04
Loss = 4.1846e-01, PNorm = 52.3072, GNorm = 1.0383, lr_0 = 6.4490e-04
Loss = 4.4699e-01, PNorm = 52.3260, GNorm = 1.4134, lr_0 = 6.4446e-04
Loss = 4.6321e-01, PNorm = 52.3445, GNorm = 1.9353, lr_0 = 6.4402e-04
Loss = 4.0896e-01, PNorm = 52.3595, GNorm = 2.0257, lr_0 = 6.4358e-04
Loss = 4.5007e-01, PNorm = 52.3739, GNorm = 3.1381, lr_0 = 6.4314e-04
Loss = 4.5330e-01, PNorm = 52.3984, GNorm = 1.4452, lr_0 = 6.4270e-04
Loss = 4.5863e-01, PNorm = 52.4211, GNorm = 2.1191, lr_0 = 6.4226e-04
Loss = 4.5224e-01, PNorm = 52.4347, GNorm = 0.9970, lr_0 = 6.4182e-04
Loss = 4.4177e-01, PNorm = 52.4503, GNorm = 1.6186, lr_0 = 6.4138e-04
Loss = 4.1021e-01, PNorm = 52.4580, GNorm = 1.1731, lr_0 = 6.4094e-04
Loss = 4.4045e-01, PNorm = 52.4738, GNorm = 1.5620, lr_0 = 6.4050e-04
Loss = 4.2113e-01, PNorm = 52.4887, GNorm = 1.4197, lr_0 = 6.4006e-04
Loss = 4.2506e-01, PNorm = 52.5095, GNorm = 1.2473, lr_0 = 6.3962e-04
Loss = 4.4099e-01, PNorm = 52.5203, GNorm = 1.0464, lr_0 = 6.3918e-04
Loss = 5.1607e-01, PNorm = 52.5320, GNorm = 1.2961, lr_0 = 6.3874e-04
Loss = 4.6119e-01, PNorm = 52.5488, GNorm = 1.5661, lr_0 = 6.3831e-04
Loss = 4.2595e-01, PNorm = 52.5637, GNorm = 1.2521, lr_0 = 6.3787e-04
Loss = 4.5010e-01, PNorm = 52.5779, GNorm = 0.9935, lr_0 = 6.3743e-04
Loss = 5.3114e-01, PNorm = 52.6043, GNorm = 3.3907, lr_0 = 6.3700e-04
Loss = 4.5911e-01, PNorm = 52.6179, GNorm = 1.1999, lr_0 = 6.3656e-04
Loss = 4.7712e-01, PNorm = 52.6195, GNorm = 1.2609, lr_0 = 6.3612e-04
Loss = 3.7341e-01, PNorm = 52.6343, GNorm = 1.2716, lr_0 = 6.3569e-04
Loss = 4.0201e-01, PNorm = 52.6492, GNorm = 0.9141, lr_0 = 6.3525e-04
Loss = 4.2859e-01, PNorm = 52.6597, GNorm = 1.0588, lr_0 = 6.3482e-04
Loss = 4.5748e-01, PNorm = 52.6706, GNorm = 1.0383, lr_0 = 6.3438e-04
Loss = 4.3931e-01, PNorm = 52.6836, GNorm = 1.0010, lr_0 = 6.3395e-04
Loss = 4.2927e-01, PNorm = 52.6931, GNorm = 1.0886, lr_0 = 6.3351e-04
Loss = 4.0550e-01, PNorm = 52.7099, GNorm = 1.2829, lr_0 = 6.3308e-04
Loss = 4.7534e-01, PNorm = 52.7264, GNorm = 0.9147, lr_0 = 6.3265e-04
Loss = 4.5946e-01, PNorm = 52.7483, GNorm = 1.3438, lr_0 = 6.3221e-04
Loss = 4.0985e-01, PNorm = 52.7671, GNorm = 2.1190, lr_0 = 6.3178e-04
Loss = 4.7928e-01, PNorm = 52.7822, GNorm = 1.0739, lr_0 = 6.3135e-04
Loss = 4.9132e-01, PNorm = 52.7985, GNorm = 2.4069, lr_0 = 6.3091e-04
Loss = 5.4389e-01, PNorm = 52.8080, GNorm = 1.1655, lr_0 = 6.3048e-04
Loss = 4.8006e-01, PNorm = 52.8256, GNorm = 0.8706, lr_0 = 6.3005e-04
Loss = 4.4614e-01, PNorm = 52.8409, GNorm = 1.8238, lr_0 = 6.2962e-04
Loss = 4.3669e-01, PNorm = 52.8529, GNorm = 1.1553, lr_0 = 6.2919e-04
Loss = 4.4901e-01, PNorm = 52.8723, GNorm = 1.2041, lr_0 = 6.2876e-04
Loss = 5.5446e-01, PNorm = 52.8825, GNorm = 2.2277, lr_0 = 6.2833e-04
Loss = 4.7195e-01, PNorm = 52.8926, GNorm = 1.6484, lr_0 = 6.2789e-04
Loss = 4.8539e-01, PNorm = 52.9065, GNorm = 1.1179, lr_0 = 6.2746e-04
Loss = 4.0723e-01, PNorm = 52.9188, GNorm = 1.7044, lr_0 = 6.2703e-04
Loss = 4.8885e-01, PNorm = 52.9350, GNorm = 1.2618, lr_0 = 6.2661e-04
Loss = 4.1156e-01, PNorm = 52.9533, GNorm = 1.2007, lr_0 = 6.2618e-04
Loss = 4.4180e-01, PNorm = 52.9650, GNorm = 1.3907, lr_0 = 6.2575e-04
Loss = 4.2776e-01, PNorm = 52.9769, GNorm = 0.7900, lr_0 = 6.2532e-04
Loss = 4.6701e-01, PNorm = 52.9942, GNorm = 1.1653, lr_0 = 6.2489e-04
Loss = 3.9997e-01, PNorm = 53.0107, GNorm = 1.7423, lr_0 = 6.2446e-04
Loss = 4.6175e-01, PNorm = 53.0209, GNorm = 1.7707, lr_0 = 6.2403e-04
Loss = 4.2050e-01, PNorm = 53.0416, GNorm = 1.3062, lr_0 = 6.2361e-04
Loss = 4.5724e-01, PNorm = 53.0498, GNorm = 1.3015, lr_0 = 6.2318e-04
Loss = 4.5774e-01, PNorm = 53.0557, GNorm = 1.2807, lr_0 = 6.2275e-04
Loss = 4.3704e-01, PNorm = 53.0670, GNorm = 1.2144, lr_0 = 6.2233e-04
Loss = 5.1841e-01, PNorm = 53.0786, GNorm = 1.9691, lr_0 = 6.2190e-04
Loss = 3.8913e-01, PNorm = 53.0967, GNorm = 1.4781, lr_0 = 6.2147e-04
Loss = 4.1523e-01, PNorm = 53.1086, GNorm = 1.1064, lr_0 = 6.2105e-04
Loss = 4.4145e-01, PNorm = 53.1230, GNorm = 1.2783, lr_0 = 6.2062e-04
Loss = 4.5858e-01, PNorm = 53.1298, GNorm = 1.3129, lr_0 = 6.2020e-04
Loss = 4.0392e-01, PNorm = 53.1331, GNorm = 1.8688, lr_0 = 6.1977e-04
Loss = 4.2220e-01, PNorm = 53.1380, GNorm = 1.2092, lr_0 = 6.1935e-04
Loss = 4.6012e-01, PNorm = 53.1459, GNorm = 1.4759, lr_0 = 6.1892e-04
Loss = 5.1708e-01, PNorm = 53.1651, GNorm = 1.2820, lr_0 = 6.1850e-04
Loss = 4.1775e-01, PNorm = 53.1774, GNorm = 1.7762, lr_0 = 6.1808e-04
Loss = 4.0030e-01, PNorm = 53.1910, GNorm = 0.9151, lr_0 = 6.1765e-04
Loss = 4.1912e-01, PNorm = 53.2083, GNorm = 1.3441, lr_0 = 6.1723e-04
Loss = 4.6671e-01, PNorm = 53.2227, GNorm = 1.6994, lr_0 = 6.1681e-04
Loss = 4.3484e-01, PNorm = 53.2338, GNorm = 1.4496, lr_0 = 6.1638e-04
Loss = 3.7895e-01, PNorm = 53.2457, GNorm = 0.9486, lr_0 = 6.1596e-04
Loss = 3.9061e-01, PNorm = 53.2586, GNorm = 1.7457, lr_0 = 6.1554e-04
Loss = 4.3037e-01, PNorm = 53.2718, GNorm = 1.3224, lr_0 = 6.1512e-04
Loss = 4.1854e-01, PNorm = 53.2837, GNorm = 1.3564, lr_0 = 6.1470e-04
Loss = 4.9680e-01, PNorm = 53.2984, GNorm = 1.5072, lr_0 = 6.1428e-04
Loss = 4.8279e-01, PNorm = 53.3156, GNorm = 1.5362, lr_0 = 6.1385e-04
Loss = 3.4964e-01, PNorm = 53.3258, GNorm = 1.5235, lr_0 = 6.1343e-04
Loss = 5.0063e-01, PNorm = 53.3347, GNorm = 1.5916, lr_0 = 6.1301e-04
Loss = 4.4557e-01, PNorm = 53.3423, GNorm = 1.1686, lr_0 = 6.1259e-04
Loss = 4.1319e-01, PNorm = 53.3590, GNorm = 1.7019, lr_0 = 6.1217e-04
Loss = 4.8401e-01, PNorm = 53.3702, GNorm = 1.3463, lr_0 = 6.1175e-04
Loss = 4.0899e-01, PNorm = 53.3857, GNorm = 1.5846, lr_0 = 6.1134e-04
Loss = 4.3923e-01, PNorm = 53.4024, GNorm = 1.5762, lr_0 = 6.1092e-04
Loss = 4.2747e-01, PNorm = 53.4201, GNorm = 0.9879, lr_0 = 6.1050e-04
Validation mae = 0.117300
Epoch 8
Loss = 3.9847e-01, PNorm = 53.4304, GNorm = 2.1877, lr_0 = 6.1008e-04
Loss = 4.5252e-01, PNorm = 53.4401, GNorm = 1.1856, lr_0 = 6.0966e-04
Loss = 3.9557e-01, PNorm = 53.4531, GNorm = 1.3347, lr_0 = 6.0924e-04
Loss = 4.2620e-01, PNorm = 53.4636, GNorm = 1.8964, lr_0 = 6.0883e-04
Loss = 4.7476e-01, PNorm = 53.4749, GNorm = 2.6747, lr_0 = 6.0841e-04
Loss = 4.7765e-01, PNorm = 53.4897, GNorm = 1.6174, lr_0 = 6.0799e-04
Loss = 5.3467e-01, PNorm = 53.5065, GNorm = 1.5057, lr_0 = 6.0758e-04
Loss = 4.1320e-01, PNorm = 53.5222, GNorm = 1.5831, lr_0 = 6.0716e-04
Loss = 4.6747e-01, PNorm = 53.5431, GNorm = 1.5581, lr_0 = 6.0674e-04
Loss = 4.1279e-01, PNorm = 53.5582, GNorm = 2.0124, lr_0 = 6.0633e-04
Loss = 3.9362e-01, PNorm = 53.5753, GNorm = 1.7390, lr_0 = 6.0591e-04
Loss = 4.6943e-01, PNorm = 53.5934, GNorm = 1.8304, lr_0 = 6.0550e-04
Loss = 3.9222e-01, PNorm = 53.6023, GNorm = 1.0907, lr_0 = 6.0508e-04
Loss = 3.8458e-01, PNorm = 53.6097, GNorm = 1.0860, lr_0 = 6.0467e-04
Loss = 3.6314e-01, PNorm = 53.6220, GNorm = 0.7223, lr_0 = 6.0425e-04
Loss = 4.4651e-01, PNorm = 53.6347, GNorm = 1.9055, lr_0 = 6.0384e-04
Loss = 4.3156e-01, PNorm = 53.6410, GNorm = 1.8008, lr_0 = 6.0343e-04
Loss = 3.8719e-01, PNorm = 53.6556, GNorm = 1.2712, lr_0 = 6.0301e-04
Loss = 4.7088e-01, PNorm = 53.6709, GNorm = 2.7154, lr_0 = 6.0260e-04
Loss = 4.3911e-01, PNorm = 53.6782, GNorm = 1.9035, lr_0 = 6.0219e-04
Loss = 4.5712e-01, PNorm = 53.6873, GNorm = 2.2403, lr_0 = 6.0178e-04
Loss = 4.4345e-01, PNorm = 53.7021, GNorm = 1.7641, lr_0 = 6.0136e-04
Loss = 4.6514e-01, PNorm = 53.7135, GNorm = 2.6995, lr_0 = 6.0095e-04
Loss = 4.1332e-01, PNorm = 53.7338, GNorm = 1.1631, lr_0 = 6.0054e-04
Loss = 4.4757e-01, PNorm = 53.7466, GNorm = 1.4363, lr_0 = 6.0013e-04
Loss = 4.7030e-01, PNorm = 53.7572, GNorm = 1.2821, lr_0 = 5.9972e-04
Loss = 4.1798e-01, PNorm = 53.7723, GNorm = 1.6534, lr_0 = 5.9931e-04
Loss = 4.5425e-01, PNorm = 53.7847, GNorm = 1.7267, lr_0 = 5.9890e-04
Loss = 4.4926e-01, PNorm = 53.7968, GNorm = 1.1039, lr_0 = 5.9849e-04
Loss = 4.3944e-01, PNorm = 53.8056, GNorm = 1.5988, lr_0 = 5.9808e-04
Loss = 3.8869e-01, PNorm = 53.8176, GNorm = 2.1092, lr_0 = 5.9767e-04
Loss = 4.2121e-01, PNorm = 53.8270, GNorm = 1.6614, lr_0 = 5.9726e-04
Loss = 3.7398e-01, PNorm = 53.8436, GNorm = 1.6643, lr_0 = 5.9685e-04
Loss = 4.3338e-01, PNorm = 53.8523, GNorm = 1.4378, lr_0 = 5.9644e-04
Loss = 4.2029e-01, PNorm = 53.8609, GNorm = 1.1548, lr_0 = 5.9603e-04
Loss = 4.0631e-01, PNorm = 53.8761, GNorm = 1.0606, lr_0 = 5.9562e-04
Loss = 4.7726e-01, PNorm = 53.8863, GNorm = 1.9124, lr_0 = 5.9521e-04
Loss = 4.8298e-01, PNorm = 53.8991, GNorm = 0.9843, lr_0 = 5.9481e-04
Loss = 4.0446e-01, PNorm = 53.9110, GNorm = 1.4061, lr_0 = 5.9440e-04
Loss = 4.5461e-01, PNorm = 53.9179, GNorm = 1.5315, lr_0 = 5.9399e-04
Loss = 4.3515e-01, PNorm = 53.9281, GNorm = 0.7312, lr_0 = 5.9358e-04
Loss = 4.0528e-01, PNorm = 53.9433, GNorm = 2.3639, lr_0 = 5.9318e-04
Loss = 4.5418e-01, PNorm = 53.9556, GNorm = 0.9267, lr_0 = 5.9277e-04
Loss = 5.1451e-01, PNorm = 53.9714, GNorm = 1.3747, lr_0 = 5.9236e-04
Loss = 5.0513e-01, PNorm = 53.9893, GNorm = 1.2128, lr_0 = 5.9196e-04
Loss = 4.3503e-01, PNorm = 54.0005, GNorm = 1.2761, lr_0 = 5.9155e-04
Loss = 4.0693e-01, PNorm = 54.0082, GNorm = 1.0057, lr_0 = 5.9115e-04
Loss = 4.2220e-01, PNorm = 54.0275, GNorm = 1.1792, lr_0 = 5.9074e-04
Loss = 4.4576e-01, PNorm = 54.0337, GNorm = 1.9223, lr_0 = 5.9034e-04
Loss = 4.1267e-01, PNorm = 54.0455, GNorm = 1.5574, lr_0 = 5.8993e-04
Loss = 4.5308e-01, PNorm = 54.0548, GNorm = 1.0936, lr_0 = 5.8953e-04
Loss = 4.7387e-01, PNorm = 54.0664, GNorm = 1.4934, lr_0 = 5.8913e-04
Loss = 4.3551e-01, PNorm = 54.0842, GNorm = 1.1686, lr_0 = 5.8872e-04
Loss = 4.6532e-01, PNorm = 54.0922, GNorm = 2.6698, lr_0 = 5.8832e-04
Loss = 4.0540e-01, PNorm = 54.1043, GNorm = 1.0116, lr_0 = 5.8792e-04
Loss = 3.4523e-01, PNorm = 54.1180, GNorm = 0.9538, lr_0 = 5.8751e-04
Loss = 4.7187e-01, PNorm = 54.1283, GNorm = 1.7424, lr_0 = 5.8711e-04
Loss = 4.5142e-01, PNorm = 54.1363, GNorm = 0.9048, lr_0 = 5.8671e-04
Loss = 4.7088e-01, PNorm = 54.1449, GNorm = 1.6495, lr_0 = 5.8631e-04
Loss = 4.4859e-01, PNorm = 54.1537, GNorm = 1.0317, lr_0 = 5.8591e-04
Loss = 4.7180e-01, PNorm = 54.1699, GNorm = 2.5778, lr_0 = 5.8550e-04
Loss = 4.1482e-01, PNorm = 54.1841, GNorm = 1.9169, lr_0 = 5.8510e-04
Loss = 4.4380e-01, PNorm = 54.1940, GNorm = 1.7173, lr_0 = 5.8470e-04
Loss = 4.2423e-01, PNorm = 54.2063, GNorm = 1.4215, lr_0 = 5.8430e-04
Loss = 4.5271e-01, PNorm = 54.2189, GNorm = 1.5763, lr_0 = 5.8390e-04
Loss = 4.4346e-01, PNorm = 54.2358, GNorm = 3.0279, lr_0 = 5.8350e-04
Loss = 4.4701e-01, PNorm = 54.2453, GNorm = 0.9285, lr_0 = 5.8310e-04
Loss = 4.8033e-01, PNorm = 54.2611, GNorm = 1.7631, lr_0 = 5.8270e-04
Loss = 4.3431e-01, PNorm = 54.2729, GNorm = 0.8417, lr_0 = 5.8230e-04
Loss = 4.0234e-01, PNorm = 54.2852, GNorm = 1.6468, lr_0 = 5.8190e-04
Loss = 4.2763e-01, PNorm = 54.2989, GNorm = 1.3130, lr_0 = 5.8151e-04
Loss = 4.8571e-01, PNorm = 54.3096, GNorm = 1.1159, lr_0 = 5.8111e-04
Loss = 3.7997e-01, PNorm = 54.3199, GNorm = 1.2727, lr_0 = 5.8071e-04
Loss = 4.0153e-01, PNorm = 54.3286, GNorm = 1.1057, lr_0 = 5.8031e-04
Loss = 4.0193e-01, PNorm = 54.3411, GNorm = 1.5459, lr_0 = 5.7991e-04
Loss = 4.2187e-01, PNorm = 54.3596, GNorm = 1.3658, lr_0 = 5.7952e-04
Loss = 4.5092e-01, PNorm = 54.3769, GNorm = 1.3625, lr_0 = 5.7912e-04
Loss = 4.6734e-01, PNorm = 54.3951, GNorm = 1.3440, lr_0 = 5.7872e-04
Loss = 4.0709e-01, PNorm = 54.4125, GNorm = 1.0562, lr_0 = 5.7833e-04
Loss = 4.8471e-01, PNorm = 54.4217, GNorm = 1.5512, lr_0 = 5.7793e-04
Loss = 4.1427e-01, PNorm = 54.4371, GNorm = 0.9247, lr_0 = 5.7753e-04
Loss = 4.2967e-01, PNorm = 54.4470, GNorm = 1.5212, lr_0 = 5.7714e-04
Loss = 4.5768e-01, PNorm = 54.4533, GNorm = 1.6585, lr_0 = 5.7674e-04
Loss = 4.3011e-01, PNorm = 54.4638, GNorm = 1.0423, lr_0 = 5.7635e-04
Loss = 4.2249e-01, PNorm = 54.4759, GNorm = 1.2937, lr_0 = 5.7595e-04
Loss = 3.9631e-01, PNorm = 54.4865, GNorm = 1.6246, lr_0 = 5.7556e-04
Loss = 4.0114e-01, PNorm = 54.4958, GNorm = 1.7713, lr_0 = 5.7516e-04
Loss = 4.3144e-01, PNorm = 54.5073, GNorm = 2.5292, lr_0 = 5.7477e-04
Loss = 3.7680e-01, PNorm = 54.5232, GNorm = 1.4911, lr_0 = 5.7438e-04
Loss = 4.4674e-01, PNorm = 54.5343, GNorm = 1.3026, lr_0 = 5.7398e-04
Loss = 4.3234e-01, PNorm = 54.5462, GNorm = 0.9874, lr_0 = 5.7359e-04
Loss = 4.0725e-01, PNorm = 54.5596, GNorm = 1.4157, lr_0 = 5.7320e-04
Loss = 4.4794e-01, PNorm = 54.5682, GNorm = 1.7307, lr_0 = 5.7280e-04
Loss = 4.5890e-01, PNorm = 54.5770, GNorm = 1.4133, lr_0 = 5.7241e-04
Loss = 4.2391e-01, PNorm = 54.5838, GNorm = 2.1222, lr_0 = 5.7202e-04
Loss = 4.9866e-01, PNorm = 54.5956, GNorm = 1.4492, lr_0 = 5.7163e-04
Loss = 4.2052e-01, PNorm = 54.6079, GNorm = 0.8316, lr_0 = 5.7124e-04
Loss = 4.9029e-01, PNorm = 54.6214, GNorm = 2.3910, lr_0 = 5.7084e-04
Loss = 4.5126e-01, PNorm = 54.6342, GNorm = 1.8418, lr_0 = 5.7045e-04
Loss = 4.2050e-01, PNorm = 54.6479, GNorm = 1.7984, lr_0 = 5.7006e-04
Loss = 4.5148e-01, PNorm = 54.6605, GNorm = 1.7332, lr_0 = 5.6967e-04
Loss = 4.2972e-01, PNorm = 54.6737, GNorm = 1.5483, lr_0 = 5.6928e-04
Loss = 4.3566e-01, PNorm = 54.6806, GNorm = 1.6079, lr_0 = 5.6889e-04
Loss = 4.4304e-01, PNorm = 54.6878, GNorm = 1.6670, lr_0 = 5.6850e-04
Loss = 4.0577e-01, PNorm = 54.6992, GNorm = 1.4707, lr_0 = 5.6811e-04
Loss = 4.2741e-01, PNorm = 54.7077, GNorm = 1.2841, lr_0 = 5.6772e-04
Loss = 4.1640e-01, PNorm = 54.7209, GNorm = 1.7657, lr_0 = 5.6733e-04
Loss = 4.5672e-01, PNorm = 54.7271, GNorm = 1.4718, lr_0 = 5.6695e-04
Loss = 4.1856e-01, PNorm = 54.7330, GNorm = 2.6851, lr_0 = 5.6656e-04
Loss = 4.4531e-01, PNorm = 54.7437, GNorm = 0.9061, lr_0 = 5.6617e-04
Loss = 4.2597e-01, PNorm = 54.7533, GNorm = 1.1846, lr_0 = 5.6578e-04
Loss = 4.2513e-01, PNorm = 54.7665, GNorm = 1.3909, lr_0 = 5.6539e-04
Loss = 4.2422e-01, PNorm = 54.7798, GNorm = 1.4704, lr_0 = 5.6501e-04
Loss = 3.8040e-01, PNorm = 54.7903, GNorm = 1.3632, lr_0 = 5.6462e-04
Loss = 4.1108e-01, PNorm = 54.7974, GNorm = 1.3397, lr_0 = 5.6423e-04
Loss = 4.3420e-01, PNorm = 54.8045, GNorm = 1.3089, lr_0 = 5.6385e-04
Loss = 3.7554e-01, PNorm = 54.8136, GNorm = 1.1180, lr_0 = 5.6346e-04
Loss = 4.4317e-01, PNorm = 54.8238, GNorm = 1.2078, lr_0 = 5.6307e-04
Loss = 4.6637e-01, PNorm = 54.8343, GNorm = 1.4696, lr_0 = 5.6269e-04
Loss = 4.0927e-01, PNorm = 54.8458, GNorm = 1.3669, lr_0 = 5.6230e-04
Validation mae = 0.115401
Epoch 9
Loss = 4.4336e-01, PNorm = 54.8598, GNorm = 1.5174, lr_0 = 5.6192e-04
Loss = 3.7481e-01, PNorm = 54.8725, GNorm = 1.3197, lr_0 = 5.6153e-04
Loss = 3.6637e-01, PNorm = 54.8739, GNorm = 1.5498, lr_0 = 5.6115e-04
Loss = 3.7312e-01, PNorm = 54.8849, GNorm = 0.8733, lr_0 = 5.6076e-04
Loss = 3.4219e-01, PNorm = 54.8931, GNorm = 1.3396, lr_0 = 5.6038e-04
Loss = 4.4450e-01, PNorm = 54.8962, GNorm = 1.6375, lr_0 = 5.6000e-04
Loss = 4.1525e-01, PNorm = 54.9021, GNorm = 1.3170, lr_0 = 5.5961e-04
Loss = 4.0965e-01, PNorm = 54.9123, GNorm = 1.1135, lr_0 = 5.5923e-04
Loss = 4.2838e-01, PNorm = 54.9230, GNorm = 1.5853, lr_0 = 5.5885e-04
Loss = 4.4733e-01, PNorm = 54.9290, GNorm = 1.6473, lr_0 = 5.5846e-04
Loss = 4.6080e-01, PNorm = 54.9442, GNorm = 0.9236, lr_0 = 5.5808e-04
Loss = 4.0072e-01, PNorm = 54.9550, GNorm = 1.8130, lr_0 = 5.5770e-04
Loss = 4.5665e-01, PNorm = 54.9694, GNorm = 1.6921, lr_0 = 5.5732e-04
Loss = 4.4014e-01, PNorm = 54.9848, GNorm = 1.8232, lr_0 = 5.5693e-04
Loss = 4.7647e-01, PNorm = 54.9968, GNorm = 1.1001, lr_0 = 5.5655e-04
Loss = 4.4607e-01, PNorm = 55.0087, GNorm = 1.8993, lr_0 = 5.5617e-04
Loss = 3.9021e-01, PNorm = 55.0186, GNorm = 1.0111, lr_0 = 5.5579e-04
Loss = 4.1524e-01, PNorm = 55.0319, GNorm = 1.1661, lr_0 = 5.5541e-04
Loss = 4.9926e-01, PNorm = 55.0435, GNorm = 1.8173, lr_0 = 5.5503e-04
Loss = 4.6386e-01, PNorm = 55.0586, GNorm = 2.2058, lr_0 = 5.5465e-04
Loss = 3.6706e-01, PNorm = 55.0739, GNorm = 1.3796, lr_0 = 5.5427e-04
Loss = 4.2837e-01, PNorm = 55.0845, GNorm = 1.1089, lr_0 = 5.5389e-04
Loss = 4.3319e-01, PNorm = 55.0940, GNorm = 1.2376, lr_0 = 5.5351e-04
Loss = 4.2190e-01, PNorm = 55.1090, GNorm = 1.2630, lr_0 = 5.5313e-04
Loss = 4.2410e-01, PNorm = 55.1295, GNorm = 1.1875, lr_0 = 5.5275e-04
Loss = 4.4870e-01, PNorm = 55.1455, GNorm = 1.1676, lr_0 = 5.5237e-04
Loss = 4.3295e-01, PNorm = 55.1560, GNorm = 1.1268, lr_0 = 5.5199e-04
Loss = 5.2356e-01, PNorm = 55.1711, GNorm = 1.3684, lr_0 = 5.5162e-04
Loss = 3.8683e-01, PNorm = 55.1858, GNorm = 1.0763, lr_0 = 5.5124e-04
Loss = 4.4415e-01, PNorm = 55.1941, GNorm = 1.0916, lr_0 = 5.5086e-04
Loss = 4.5825e-01, PNorm = 55.2038, GNorm = 1.6989, lr_0 = 5.5048e-04
Loss = 3.9529e-01, PNorm = 55.2105, GNorm = 1.0531, lr_0 = 5.5011e-04
Loss = 3.9250e-01, PNorm = 55.2181, GNorm = 1.1408, lr_0 = 5.4973e-04
Loss = 4.0566e-01, PNorm = 55.2250, GNorm = 1.4564, lr_0 = 5.4935e-04
Loss = 4.1418e-01, PNorm = 55.2365, GNorm = 1.6349, lr_0 = 5.4898e-04
Loss = 4.2087e-01, PNorm = 55.2481, GNorm = 0.9944, lr_0 = 5.4860e-04
Loss = 4.4288e-01, PNorm = 55.2606, GNorm = 1.8715, lr_0 = 5.4822e-04
Loss = 4.1496e-01, PNorm = 55.2722, GNorm = 1.7514, lr_0 = 5.4785e-04
Loss = 5.1136e-01, PNorm = 55.2830, GNorm = 1.2509, lr_0 = 5.4747e-04
Loss = 4.3578e-01, PNorm = 55.2904, GNorm = 1.3707, lr_0 = 5.4710e-04
Loss = 4.0941e-01, PNorm = 55.2995, GNorm = 1.5617, lr_0 = 5.4672e-04
Loss = 4.4521e-01, PNorm = 55.3136, GNorm = 2.4002, lr_0 = 5.4635e-04
Loss = 4.0274e-01, PNorm = 55.3265, GNorm = 1.1017, lr_0 = 5.4597e-04
Loss = 4.4718e-01, PNorm = 55.3377, GNorm = 1.3359, lr_0 = 5.4560e-04
Loss = 4.4389e-01, PNorm = 55.3412, GNorm = 1.9066, lr_0 = 5.4523e-04
Loss = 4.1169e-01, PNorm = 55.3535, GNorm = 1.1347, lr_0 = 5.4485e-04
Loss = 4.5859e-01, PNorm = 55.3653, GNorm = 1.2328, lr_0 = 5.4448e-04
Loss = 4.2349e-01, PNorm = 55.3786, GNorm = 1.8463, lr_0 = 5.4411e-04
Loss = 4.2069e-01, PNorm = 55.3927, GNorm = 1.3263, lr_0 = 5.4373e-04
Loss = 4.1928e-01, PNorm = 55.4029, GNorm = 1.4132, lr_0 = 5.4336e-04
Loss = 4.1731e-01, PNorm = 55.4107, GNorm = 1.1404, lr_0 = 5.4299e-04
Loss = 3.7485e-01, PNorm = 55.4206, GNorm = 0.9236, lr_0 = 5.4262e-04
Loss = 4.1458e-01, PNorm = 55.4305, GNorm = 1.3965, lr_0 = 5.4225e-04
Loss = 4.3692e-01, PNorm = 55.4352, GNorm = 1.9358, lr_0 = 5.4187e-04
Loss = 4.3596e-01, PNorm = 55.4423, GNorm = 1.0486, lr_0 = 5.4150e-04
Loss = 3.5871e-01, PNorm = 55.4576, GNorm = 1.1925, lr_0 = 5.4113e-04
Loss = 5.1561e-01, PNorm = 55.4674, GNorm = 1.4493, lr_0 = 5.4076e-04
Loss = 4.3470e-01, PNorm = 55.4796, GNorm = 1.5455, lr_0 = 5.4039e-04
Loss = 4.2340e-01, PNorm = 55.4907, GNorm = 1.4370, lr_0 = 5.4002e-04
Loss = 4.4090e-01, PNorm = 55.5085, GNorm = 1.2687, lr_0 = 5.3965e-04
Loss = 3.5727e-01, PNorm = 55.5193, GNorm = 1.2153, lr_0 = 5.3928e-04
Loss = 3.9905e-01, PNorm = 55.5242, GNorm = 1.4007, lr_0 = 5.3891e-04
Loss = 4.2241e-01, PNorm = 55.5291, GNorm = 1.6002, lr_0 = 5.3854e-04
Loss = 4.6967e-01, PNorm = 55.5391, GNorm = 1.4642, lr_0 = 5.3817e-04
Loss = 4.3522e-01, PNorm = 55.5535, GNorm = 1.3084, lr_0 = 5.3781e-04
Loss = 4.2357e-01, PNorm = 55.5674, GNorm = 1.6523, lr_0 = 5.3744e-04
Loss = 4.3634e-01, PNorm = 55.5783, GNorm = 1.1702, lr_0 = 5.3707e-04
Loss = 3.7926e-01, PNorm = 55.5818, GNorm = 1.4092, lr_0 = 5.3670e-04
Loss = 3.5887e-01, PNorm = 55.5937, GNorm = 1.3288, lr_0 = 5.3633e-04
Loss = 4.1963e-01, PNorm = 55.6051, GNorm = 1.4987, lr_0 = 5.3597e-04
Loss = 3.6458e-01, PNorm = 55.6166, GNorm = 1.1169, lr_0 = 5.3560e-04
Loss = 4.1937e-01, PNorm = 55.6289, GNorm = 1.4026, lr_0 = 5.3523e-04
Loss = 4.1238e-01, PNorm = 55.6404, GNorm = 1.9301, lr_0 = 5.3486e-04
Loss = 4.5748e-01, PNorm = 55.6544, GNorm = 1.4078, lr_0 = 5.3450e-04
Loss = 3.6934e-01, PNorm = 55.6678, GNorm = 1.2282, lr_0 = 5.3413e-04
Loss = 4.7976e-01, PNorm = 55.6751, GNorm = 1.4917, lr_0 = 5.3377e-04
Loss = 4.1701e-01, PNorm = 55.6832, GNorm = 1.7098, lr_0 = 5.3340e-04
Loss = 3.7968e-01, PNorm = 55.6888, GNorm = 1.0358, lr_0 = 5.3304e-04
Loss = 4.9980e-01, PNorm = 55.6997, GNorm = 1.5317, lr_0 = 5.3267e-04
Loss = 4.4166e-01, PNorm = 55.7059, GNorm = 1.2537, lr_0 = 5.3231e-04
Loss = 4.5220e-01, PNorm = 55.7139, GNorm = 2.9581, lr_0 = 5.3194e-04
Loss = 3.9593e-01, PNorm = 55.7240, GNorm = 1.1006, lr_0 = 5.3158e-04
Loss = 4.2110e-01, PNorm = 55.7288, GNorm = 1.1854, lr_0 = 5.3121e-04
Loss = 4.0815e-01, PNorm = 55.7344, GNorm = 1.0003, lr_0 = 5.3085e-04
Loss = 3.9461e-01, PNorm = 55.7463, GNorm = 0.7769, lr_0 = 5.3048e-04
Loss = 4.8593e-01, PNorm = 55.7546, GNorm = 1.1874, lr_0 = 5.3012e-04
Loss = 4.2062e-01, PNorm = 55.7613, GNorm = 2.1996, lr_0 = 5.2976e-04
Loss = 4.4979e-01, PNorm = 55.7648, GNorm = 1.0190, lr_0 = 5.2939e-04
Loss = 4.5726e-01, PNorm = 55.7697, GNorm = 1.8966, lr_0 = 5.2903e-04
Loss = 4.6797e-01, PNorm = 55.7747, GNorm = 1.4143, lr_0 = 5.2867e-04
Loss = 5.0866e-01, PNorm = 55.7874, GNorm = 1.6435, lr_0 = 5.2831e-04
Loss = 4.2286e-01, PNorm = 55.8021, GNorm = 0.8515, lr_0 = 5.2795e-04
Loss = 3.8601e-01, PNorm = 55.8133, GNorm = 1.6170, lr_0 = 5.2758e-04
Loss = 4.8295e-01, PNorm = 55.8195, GNorm = 1.4147, lr_0 = 5.2722e-04
Loss = 3.8064e-01, PNorm = 55.8311, GNorm = 1.4642, lr_0 = 5.2686e-04
Loss = 3.9750e-01, PNorm = 55.8378, GNorm = 1.4317, lr_0 = 5.2650e-04
Loss = 4.4076e-01, PNorm = 55.8496, GNorm = 0.9533, lr_0 = 5.2614e-04
Loss = 4.2119e-01, PNorm = 55.8588, GNorm = 1.3056, lr_0 = 5.2578e-04
Loss = 4.1911e-01, PNorm = 55.8734, GNorm = 1.4502, lr_0 = 5.2542e-04
Loss = 4.1356e-01, PNorm = 55.8814, GNorm = 1.2398, lr_0 = 5.2506e-04
Loss = 4.5375e-01, PNorm = 55.8844, GNorm = 2.0188, lr_0 = 5.2470e-04
Loss = 4.4227e-01, PNorm = 55.8919, GNorm = 2.3382, lr_0 = 5.2434e-04
Loss = 4.1859e-01, PNorm = 55.9008, GNorm = 1.0675, lr_0 = 5.2398e-04
Loss = 4.0957e-01, PNorm = 55.9083, GNorm = 1.0290, lr_0 = 5.2362e-04
Loss = 4.6726e-01, PNorm = 55.9188, GNorm = 1.7788, lr_0 = 5.2326e-04
Loss = 4.2236e-01, PNorm = 55.9283, GNorm = 2.0916, lr_0 = 5.2290e-04
Loss = 4.2168e-01, PNorm = 55.9451, GNorm = 1.5528, lr_0 = 5.2255e-04
Loss = 4.3332e-01, PNorm = 55.9555, GNorm = 1.9166, lr_0 = 5.2219e-04
Loss = 4.0775e-01, PNorm = 55.9715, GNorm = 1.6151, lr_0 = 5.2183e-04
Loss = 4.1758e-01, PNorm = 55.9753, GNorm = 1.1048, lr_0 = 5.2147e-04
Loss = 4.2164e-01, PNorm = 55.9801, GNorm = 1.2256, lr_0 = 5.2112e-04
Loss = 4.4662e-01, PNorm = 55.9864, GNorm = 1.6927, lr_0 = 5.2076e-04
Loss = 4.3125e-01, PNorm = 56.0003, GNorm = 1.9547, lr_0 = 5.2040e-04
Loss = 3.9317e-01, PNorm = 56.0077, GNorm = 0.8953, lr_0 = 5.2005e-04
Loss = 4.2625e-01, PNorm = 56.0164, GNorm = 1.4479, lr_0 = 5.1969e-04
Loss = 4.2149e-01, PNorm = 56.0292, GNorm = 1.1915, lr_0 = 5.1933e-04
Loss = 5.1163e-01, PNorm = 56.0410, GNorm = 1.1411, lr_0 = 5.1898e-04
Loss = 3.7925e-01, PNorm = 56.0511, GNorm = 1.1264, lr_0 = 5.1862e-04
Loss = 3.7414e-01, PNorm = 56.0601, GNorm = 1.1835, lr_0 = 5.1827e-04
Loss = 4.5039e-01, PNorm = 56.0659, GNorm = 1.5724, lr_0 = 5.1791e-04
Validation mae = 0.116165
Epoch 10
Loss = 4.4240e-01, PNorm = 56.0722, GNorm = 1.6010, lr_0 = 5.1756e-04
Loss = 3.7912e-01, PNorm = 56.0823, GNorm = 0.9785, lr_0 = 5.1720e-04
Loss = 3.7055e-01, PNorm = 56.0904, GNorm = 1.7465, lr_0 = 5.1685e-04
Loss = 3.9610e-01, PNorm = 56.1007, GNorm = 0.9790, lr_0 = 5.1649e-04
Loss = 4.1128e-01, PNorm = 56.1070, GNorm = 1.4845, lr_0 = 5.1614e-04
Loss = 4.0701e-01, PNorm = 56.1162, GNorm = 1.2802, lr_0 = 5.1579e-04
Loss = 4.1093e-01, PNorm = 56.1269, GNorm = 1.3434, lr_0 = 5.1543e-04
Loss = 3.9249e-01, PNorm = 56.1396, GNorm = 0.9715, lr_0 = 5.1508e-04
Loss = 3.6196e-01, PNorm = 56.1537, GNorm = 1.2576, lr_0 = 5.1473e-04
Loss = 4.1948e-01, PNorm = 56.1621, GNorm = 1.5312, lr_0 = 5.1437e-04
Loss = 4.3860e-01, PNorm = 56.1749, GNorm = 1.1204, lr_0 = 5.1402e-04
Loss = 4.8884e-01, PNorm = 56.1926, GNorm = 1.7855, lr_0 = 5.1367e-04
Loss = 4.1572e-01, PNorm = 56.2085, GNorm = 1.2834, lr_0 = 5.1332e-04
Loss = 4.1044e-01, PNorm = 56.2203, GNorm = 1.3175, lr_0 = 5.1297e-04
Loss = 4.2517e-01, PNorm = 56.2271, GNorm = 2.0538, lr_0 = 5.1262e-04
Loss = 4.8780e-01, PNorm = 56.2425, GNorm = 1.1638, lr_0 = 5.1226e-04
Loss = 4.3156e-01, PNorm = 56.2581, GNorm = 1.8762, lr_0 = 5.1191e-04
Loss = 3.7175e-01, PNorm = 56.2652, GNorm = 1.4686, lr_0 = 5.1156e-04
Loss = 4.1670e-01, PNorm = 56.2712, GNorm = 0.9498, lr_0 = 5.1121e-04
Loss = 4.5856e-01, PNorm = 56.2712, GNorm = 1.5348, lr_0 = 5.1086e-04
Loss = 4.3373e-01, PNorm = 56.2784, GNorm = 1.5095, lr_0 = 5.1051e-04
Loss = 3.6150e-01, PNorm = 56.2864, GNorm = 1.4870, lr_0 = 5.1016e-04
Loss = 4.2977e-01, PNorm = 56.2962, GNorm = 1.2834, lr_0 = 5.0981e-04
Loss = 4.3083e-01, PNorm = 56.3065, GNorm = 1.5365, lr_0 = 5.0946e-04
Loss = 3.9415e-01, PNorm = 56.3155, GNorm = 1.4742, lr_0 = 5.0911e-04
Loss = 4.5488e-01, PNorm = 56.3293, GNorm = 2.1648, lr_0 = 5.0877e-04
Loss = 3.7403e-01, PNorm = 56.3441, GNorm = 1.5675, lr_0 = 5.0842e-04
Loss = 3.6321e-01, PNorm = 56.3466, GNorm = 0.7886, lr_0 = 5.0807e-04
Loss = 4.5136e-01, PNorm = 56.3543, GNorm = 1.1119, lr_0 = 5.0772e-04
Loss = 3.7686e-01, PNorm = 56.3705, GNorm = 1.4759, lr_0 = 5.0737e-04
Loss = 4.1072e-01, PNorm = 56.3793, GNorm = 1.2684, lr_0 = 5.0703e-04
Loss = 3.7488e-01, PNorm = 56.3916, GNorm = 1.3019, lr_0 = 5.0668e-04
Loss = 4.0953e-01, PNorm = 56.3994, GNorm = 1.4898, lr_0 = 5.0633e-04
Loss = 4.4361e-01, PNorm = 56.4080, GNorm = 1.3208, lr_0 = 5.0598e-04
Loss = 4.2967e-01, PNorm = 56.4190, GNorm = 1.0156, lr_0 = 5.0564e-04
Loss = 4.8994e-01, PNorm = 56.4252, GNorm = 1.5840, lr_0 = 5.0529e-04
Loss = 4.0883e-01, PNorm = 56.4345, GNorm = 1.5266, lr_0 = 5.0494e-04
Loss = 4.1648e-01, PNorm = 56.4411, GNorm = 1.0786, lr_0 = 5.0460e-04
Loss = 4.1309e-01, PNorm = 56.4574, GNorm = 1.0256, lr_0 = 5.0425e-04
Loss = 4.2399e-01, PNorm = 56.4743, GNorm = 1.2301, lr_0 = 5.0391e-04
Loss = 4.3602e-01, PNorm = 56.4884, GNorm = 1.4059, lr_0 = 5.0356e-04
Loss = 4.0299e-01, PNorm = 56.4978, GNorm = 1.6560, lr_0 = 5.0322e-04
Loss = 4.5272e-01, PNorm = 56.5080, GNorm = 1.0735, lr_0 = 5.0287e-04
Loss = 3.8462e-01, PNorm = 56.5164, GNorm = 1.4000, lr_0 = 5.0253e-04
Loss = 3.9037e-01, PNorm = 56.5269, GNorm = 2.0388, lr_0 = 5.0218e-04
Loss = 4.1997e-01, PNorm = 56.5354, GNorm = 1.4041, lr_0 = 5.0184e-04
Loss = 4.8063e-01, PNorm = 56.5435, GNorm = 1.4801, lr_0 = 5.0150e-04
Loss = 4.2032e-01, PNorm = 56.5504, GNorm = 1.7350, lr_0 = 5.0115e-04
Loss = 3.6638e-01, PNorm = 56.5671, GNorm = 1.1736, lr_0 = 5.0081e-04
Loss = 3.5740e-01, PNorm = 56.5783, GNorm = 1.1027, lr_0 = 5.0047e-04
Loss = 3.8587e-01, PNorm = 56.5826, GNorm = 1.2496, lr_0 = 5.0012e-04
Loss = 4.1345e-01, PNorm = 56.5911, GNorm = 1.5198, lr_0 = 4.9978e-04
Loss = 4.6320e-01, PNorm = 56.6011, GNorm = 1.3633, lr_0 = 4.9944e-04
Loss = 4.3641e-01, PNorm = 56.6131, GNorm = 1.2856, lr_0 = 4.9910e-04
Loss = 3.8828e-01, PNorm = 56.6257, GNorm = 0.9355, lr_0 = 4.9875e-04
Loss = 4.4798e-01, PNorm = 56.6387, GNorm = 1.3220, lr_0 = 4.9841e-04
Loss = 4.5323e-01, PNorm = 56.6433, GNorm = 1.5022, lr_0 = 4.9807e-04
Loss = 3.9979e-01, PNorm = 56.6548, GNorm = 1.1934, lr_0 = 4.9773e-04
Loss = 4.1307e-01, PNorm = 56.6644, GNorm = 1.3929, lr_0 = 4.9739e-04
Loss = 3.8721e-01, PNorm = 56.6731, GNorm = 1.4850, lr_0 = 4.9705e-04
Loss = 4.1891e-01, PNorm = 56.6802, GNorm = 1.9372, lr_0 = 4.9671e-04
Loss = 3.9978e-01, PNorm = 56.6888, GNorm = 1.0890, lr_0 = 4.9637e-04
Loss = 4.1335e-01, PNorm = 56.6943, GNorm = 1.3212, lr_0 = 4.9603e-04
Loss = 4.4401e-01, PNorm = 56.7046, GNorm = 1.2147, lr_0 = 4.9569e-04
Loss = 4.0929e-01, PNorm = 56.7146, GNorm = 1.0988, lr_0 = 4.9535e-04
Loss = 4.3909e-01, PNorm = 56.7176, GNorm = 1.4073, lr_0 = 4.9501e-04
Loss = 4.1088e-01, PNorm = 56.7185, GNorm = 1.7664, lr_0 = 4.9467e-04
Loss = 4.0836e-01, PNorm = 56.7281, GNorm = 1.1266, lr_0 = 4.9433e-04
Loss = 4.5137e-01, PNorm = 56.7370, GNorm = 1.1587, lr_0 = 4.9399e-04
Loss = 4.2039e-01, PNorm = 56.7463, GNorm = 1.1499, lr_0 = 4.9365e-04
Loss = 3.9886e-01, PNorm = 56.7526, GNorm = 2.0526, lr_0 = 4.9332e-04
Loss = 3.9169e-01, PNorm = 56.7625, GNorm = 1.3004, lr_0 = 4.9298e-04
Loss = 4.2254e-01, PNorm = 56.7709, GNorm = 1.4797, lr_0 = 4.9264e-04
Loss = 4.1656e-01, PNorm = 56.7807, GNorm = 1.2500, lr_0 = 4.9230e-04
Loss = 3.9167e-01, PNorm = 56.7879, GNorm = 1.4112, lr_0 = 4.9197e-04
Loss = 4.0710e-01, PNorm = 56.7958, GNorm = 1.1749, lr_0 = 4.9163e-04
Loss = 4.4157e-01, PNorm = 56.8022, GNorm = 0.9658, lr_0 = 4.9129e-04
Loss = 3.8060e-01, PNorm = 56.8071, GNorm = 1.5413, lr_0 = 4.9095e-04
Loss = 4.0788e-01, PNorm = 56.8109, GNorm = 1.2847, lr_0 = 4.9062e-04
Loss = 4.3165e-01, PNorm = 56.8207, GNorm = 1.4934, lr_0 = 4.9028e-04
Loss = 3.8364e-01, PNorm = 56.8320, GNorm = 1.7408, lr_0 = 4.8995e-04
Loss = 4.8348e-01, PNorm = 56.8405, GNorm = 1.3577, lr_0 = 4.8961e-04
Loss = 3.9936e-01, PNorm = 56.8471, GNorm = 1.4183, lr_0 = 4.8928e-04
Loss = 3.9784e-01, PNorm = 56.8517, GNorm = 1.1253, lr_0 = 4.8894e-04
Loss = 3.9490e-01, PNorm = 56.8643, GNorm = 1.2253, lr_0 = 4.8861e-04
Loss = 4.1758e-01, PNorm = 56.8721, GNorm = 1.1958, lr_0 = 4.8827e-04
Loss = 4.4390e-01, PNorm = 56.8879, GNorm = 1.5066, lr_0 = 4.8794e-04
Loss = 4.2104e-01, PNorm = 56.8986, GNorm = 2.4751, lr_0 = 4.8760e-04
Loss = 4.0508e-01, PNorm = 56.9107, GNorm = 1.4931, lr_0 = 4.8727e-04
Loss = 4.8347e-01, PNorm = 56.9180, GNorm = 1.1710, lr_0 = 4.8693e-04
Loss = 4.0182e-01, PNorm = 56.9271, GNorm = 1.1808, lr_0 = 4.8660e-04
Loss = 4.5346e-01, PNorm = 56.9360, GNorm = 1.0835, lr_0 = 4.8627e-04
Loss = 4.0693e-01, PNorm = 56.9402, GNorm = 1.2647, lr_0 = 4.8593e-04
Loss = 4.3087e-01, PNorm = 56.9521, GNorm = 1.4348, lr_0 = 4.8560e-04
Loss = 3.9009e-01, PNorm = 56.9602, GNorm = 1.3542, lr_0 = 4.8527e-04
Loss = 4.3101e-01, PNorm = 56.9764, GNorm = 1.3889, lr_0 = 4.8494e-04
Loss = 4.2167e-01, PNorm = 56.9845, GNorm = 1.2874, lr_0 = 4.8460e-04
Loss = 4.3448e-01, PNorm = 56.9965, GNorm = 1.1489, lr_0 = 4.8427e-04
Loss = 4.6708e-01, PNorm = 57.0033, GNorm = 1.6874, lr_0 = 4.8394e-04
Loss = 3.8333e-01, PNorm = 57.0150, GNorm = 1.5037, lr_0 = 4.8361e-04
Loss = 4.0734e-01, PNorm = 57.0193, GNorm = 1.4670, lr_0 = 4.8328e-04
Loss = 4.7106e-01, PNorm = 57.0278, GNorm = 1.1364, lr_0 = 4.8295e-04
Loss = 4.0277e-01, PNorm = 57.0329, GNorm = 1.2485, lr_0 = 4.8262e-04
Loss = 4.2354e-01, PNorm = 57.0504, GNorm = 1.2863, lr_0 = 4.8228e-04
Loss = 4.7170e-01, PNorm = 57.0594, GNorm = 1.2276, lr_0 = 4.8195e-04
Loss = 4.3737e-01, PNorm = 57.0676, GNorm = 1.3731, lr_0 = 4.8162e-04
Loss = 4.1525e-01, PNorm = 57.0711, GNorm = 1.4284, lr_0 = 4.8129e-04
Loss = 4.2323e-01, PNorm = 57.0836, GNorm = 2.5929, lr_0 = 4.8096e-04
Loss = 3.6462e-01, PNorm = 57.0863, GNorm = 0.8508, lr_0 = 4.8064e-04
Loss = 4.7466e-01, PNorm = 57.0969, GNorm = 1.2986, lr_0 = 4.8031e-04
Loss = 3.8803e-01, PNorm = 57.1095, GNorm = 1.5162, lr_0 = 4.7998e-04
Loss = 4.2775e-01, PNorm = 57.1214, GNorm = 1.4284, lr_0 = 4.7965e-04
Loss = 4.2655e-01, PNorm = 57.1333, GNorm = 1.8089, lr_0 = 4.7932e-04
Loss = 4.2845e-01, PNorm = 57.1408, GNorm = 1.2946, lr_0 = 4.7899e-04
Loss = 4.4410e-01, PNorm = 57.1487, GNorm = 1.6769, lr_0 = 4.7866e-04
Loss = 4.2064e-01, PNorm = 57.1578, GNorm = 1.1190, lr_0 = 4.7833e-04
Loss = 4.7636e-01, PNorm = 57.1654, GNorm = 1.3363, lr_0 = 4.7801e-04
Loss = 4.4078e-01, PNorm = 57.1737, GNorm = 1.2657, lr_0 = 4.7768e-04
Loss = 4.1029e-01, PNorm = 57.1858, GNorm = 1.1711, lr_0 = 4.7735e-04
Loss = 3.3818e-01, PNorm = 57.1928, GNorm = 1.2032, lr_0 = 4.7703e-04
Validation mae = 0.115394
Epoch 11
Loss = 4.2659e-01, PNorm = 57.2016, GNorm = 1.2036, lr_0 = 4.7670e-04
Loss = 3.8983e-01, PNorm = 57.2060, GNorm = 1.4010, lr_0 = 4.7637e-04
Loss = 5.0280e-01, PNorm = 57.2143, GNorm = 1.7704, lr_0 = 4.7605e-04
Loss = 4.5228e-01, PNorm = 57.2251, GNorm = 1.6811, lr_0 = 4.7572e-04
Loss = 4.4534e-01, PNorm = 57.2343, GNorm = 1.1113, lr_0 = 4.7539e-04
Loss = 3.8750e-01, PNorm = 57.2373, GNorm = 1.5890, lr_0 = 4.7507e-04
Loss = 5.2022e-01, PNorm = 57.2450, GNorm = 1.0021, lr_0 = 4.7474e-04
Loss = 3.6291e-01, PNorm = 57.2575, GNorm = 1.2952, lr_0 = 4.7442e-04
Loss = 3.6245e-01, PNorm = 57.2634, GNorm = 1.0727, lr_0 = 4.7409e-04
Loss = 4.0335e-01, PNorm = 57.2702, GNorm = 1.9074, lr_0 = 4.7377e-04
Loss = 3.9781e-01, PNorm = 57.2778, GNorm = 1.0054, lr_0 = 4.7344e-04
Loss = 3.8226e-01, PNorm = 57.2826, GNorm = 2.1015, lr_0 = 4.7312e-04
Loss = 4.1612e-01, PNorm = 57.2903, GNorm = 2.4507, lr_0 = 4.7279e-04
Loss = 4.0896e-01, PNorm = 57.2957, GNorm = 1.5728, lr_0 = 4.7247e-04
Loss = 4.0132e-01, PNorm = 57.3093, GNorm = 1.1587, lr_0 = 4.7215e-04
Loss = 4.7409e-01, PNorm = 57.3197, GNorm = 1.5805, lr_0 = 4.7182e-04
Loss = 4.0817e-01, PNorm = 57.3277, GNorm = 2.0037, lr_0 = 4.7150e-04
Loss = 3.9581e-01, PNorm = 57.3284, GNorm = 1.4481, lr_0 = 4.7118e-04
Loss = 4.1480e-01, PNorm = 57.3331, GNorm = 1.2472, lr_0 = 4.7085e-04
Loss = 3.9564e-01, PNorm = 57.3428, GNorm = 1.4925, lr_0 = 4.7053e-04
Loss = 4.0412e-01, PNorm = 57.3567, GNorm = 1.6616, lr_0 = 4.7021e-04
Loss = 4.0767e-01, PNorm = 57.3619, GNorm = 1.2963, lr_0 = 4.6989e-04
Loss = 4.0211e-01, PNorm = 57.3697, GNorm = 0.9421, lr_0 = 4.6957e-04
Loss = 4.2465e-01, PNorm = 57.3788, GNorm = 1.7800, lr_0 = 4.6924e-04
Loss = 3.6361e-01, PNorm = 57.3881, GNorm = 1.4499, lr_0 = 4.6892e-04
Loss = 3.7088e-01, PNorm = 57.3957, GNorm = 2.5265, lr_0 = 4.6860e-04
Loss = 4.4902e-01, PNorm = 57.4026, GNorm = 1.4509, lr_0 = 4.6828e-04
Loss = 3.5631e-01, PNorm = 57.4127, GNorm = 1.5103, lr_0 = 4.6796e-04
Loss = 4.4631e-01, PNorm = 57.4199, GNorm = 1.7802, lr_0 = 4.6764e-04
Loss = 4.1330e-01, PNorm = 57.4326, GNorm = 1.5790, lr_0 = 4.6732e-04
Loss = 4.2974e-01, PNorm = 57.4422, GNorm = 1.3128, lr_0 = 4.6700e-04
Loss = 4.4285e-01, PNorm = 57.4542, GNorm = 1.7102, lr_0 = 4.6668e-04
Loss = 4.3272e-01, PNorm = 57.4614, GNorm = 1.3215, lr_0 = 4.6636e-04
Loss = 3.9264e-01, PNorm = 57.4735, GNorm = 1.4578, lr_0 = 4.6604e-04
Loss = 3.7020e-01, PNorm = 57.4815, GNorm = 1.2612, lr_0 = 4.6572e-04
Loss = 4.2420e-01, PNorm = 57.4909, GNorm = 1.1508, lr_0 = 4.6540e-04
Loss = 3.9958e-01, PNorm = 57.4957, GNorm = 1.3218, lr_0 = 4.6508e-04
Loss = 3.7392e-01, PNorm = 57.5030, GNorm = 1.4111, lr_0 = 4.6476e-04
Loss = 4.2929e-01, PNorm = 57.5089, GNorm = 1.0435, lr_0 = 4.6445e-04
Loss = 4.4487e-01, PNorm = 57.5185, GNorm = 1.3356, lr_0 = 4.6413e-04
Loss = 3.7717e-01, PNorm = 57.5276, GNorm = 0.8737, lr_0 = 4.6381e-04
Loss = 3.5770e-01, PNorm = 57.5356, GNorm = 1.3588, lr_0 = 4.6349e-04
Loss = 3.5661e-01, PNorm = 57.5407, GNorm = 1.2465, lr_0 = 4.6317e-04
Loss = 3.8528e-01, PNorm = 57.5441, GNorm = 1.1555, lr_0 = 4.6286e-04
Loss = 3.7850e-01, PNorm = 57.5505, GNorm = 1.5105, lr_0 = 4.6254e-04
Loss = 3.5436e-01, PNorm = 57.5555, GNorm = 1.4076, lr_0 = 4.6222e-04
Loss = 4.2571e-01, PNorm = 57.5663, GNorm = 1.5878, lr_0 = 4.6191e-04
Loss = 4.4357e-01, PNorm = 57.5718, GNorm = 1.4453, lr_0 = 4.6159e-04
Loss = 3.6786e-01, PNorm = 57.5800, GNorm = 1.0292, lr_0 = 4.6127e-04
Loss = 4.3944e-01, PNorm = 57.5843, GNorm = 1.4574, lr_0 = 4.6096e-04
Loss = 3.9131e-01, PNorm = 57.5940, GNorm = 2.3097, lr_0 = 4.6064e-04
Loss = 4.1654e-01, PNorm = 57.5953, GNorm = 2.0057, lr_0 = 4.6033e-04
Loss = 4.6829e-01, PNorm = 57.6071, GNorm = 1.4507, lr_0 = 4.6001e-04
Loss = 4.5298e-01, PNorm = 57.6183, GNorm = 1.5704, lr_0 = 4.5970e-04
Loss = 4.2632e-01, PNorm = 57.6280, GNorm = 1.5963, lr_0 = 4.5938e-04
Loss = 3.8328e-01, PNorm = 57.6361, GNorm = 2.1606, lr_0 = 4.5907e-04
Loss = 3.9613e-01, PNorm = 57.6439, GNorm = 1.4224, lr_0 = 4.5875e-04
Loss = 3.9834e-01, PNorm = 57.6504, GNorm = 1.2440, lr_0 = 4.5844e-04
Loss = 4.4189e-01, PNorm = 57.6594, GNorm = 1.8634, lr_0 = 4.5812e-04
Loss = 3.8828e-01, PNorm = 57.6649, GNorm = 1.4914, lr_0 = 4.5781e-04
Loss = 3.7251e-01, PNorm = 57.6714, GNorm = 1.1436, lr_0 = 4.5750e-04
Loss = 4.0284e-01, PNorm = 57.6793, GNorm = 1.1545, lr_0 = 4.5718e-04
Loss = 3.9139e-01, PNorm = 57.6918, GNorm = 1.5551, lr_0 = 4.5687e-04
Loss = 4.1889e-01, PNorm = 57.6996, GNorm = 1.6868, lr_0 = 4.5656e-04
Loss = 3.7915e-01, PNorm = 57.7088, GNorm = 1.5914, lr_0 = 4.5624e-04
Loss = 4.0376e-01, PNorm = 57.7120, GNorm = 1.7073, lr_0 = 4.5593e-04
Loss = 4.1355e-01, PNorm = 57.7201, GNorm = 1.7465, lr_0 = 4.5562e-04
Loss = 4.2835e-01, PNorm = 57.7291, GNorm = 1.2187, lr_0 = 4.5531e-04
Loss = 4.0742e-01, PNorm = 57.7337, GNorm = 0.9854, lr_0 = 4.5499e-04
Loss = 3.5911e-01, PNorm = 57.7391, GNorm = 1.6416, lr_0 = 4.5468e-04
Loss = 3.8547e-01, PNorm = 57.7459, GNorm = 1.6039, lr_0 = 4.5437e-04
Loss = 4.7161e-01, PNorm = 57.7521, GNorm = 1.6684, lr_0 = 4.5406e-04
Loss = 3.6518e-01, PNorm = 57.7644, GNorm = 1.1675, lr_0 = 4.5375e-04
Loss = 4.5518e-01, PNorm = 57.7722, GNorm = 0.9771, lr_0 = 4.5344e-04
Loss = 4.9715e-01, PNorm = 57.7812, GNorm = 1.5772, lr_0 = 4.5313e-04
Loss = 4.0455e-01, PNorm = 57.7906, GNorm = 2.1521, lr_0 = 4.5282e-04
Loss = 3.7878e-01, PNorm = 57.7964, GNorm = 1.7188, lr_0 = 4.5251e-04
Loss = 4.0212e-01, PNorm = 57.7988, GNorm = 1.6497, lr_0 = 4.5220e-04
Loss = 4.2283e-01, PNorm = 57.8041, GNorm = 2.1666, lr_0 = 4.5189e-04
Loss = 4.6355e-01, PNorm = 57.8093, GNorm = 1.9475, lr_0 = 4.5158e-04
Loss = 4.0703e-01, PNorm = 57.8245, GNorm = 1.0759, lr_0 = 4.5127e-04
Loss = 4.4916e-01, PNorm = 57.8305, GNorm = 2.5311, lr_0 = 4.5096e-04
Loss = 3.9392e-01, PNorm = 57.8439, GNorm = 1.1638, lr_0 = 4.5065e-04
Loss = 4.1641e-01, PNorm = 57.8536, GNorm = 1.7408, lr_0 = 4.5034e-04
Loss = 4.2619e-01, PNorm = 57.8609, GNorm = 1.2800, lr_0 = 4.5003e-04
Loss = 4.3941e-01, PNorm = 57.8639, GNorm = 2.0283, lr_0 = 4.4972e-04
Loss = 3.7970e-01, PNorm = 57.8701, GNorm = 1.4837, lr_0 = 4.4942e-04
Loss = 3.6570e-01, PNorm = 57.8802, GNorm = 1.4655, lr_0 = 4.4911e-04
Loss = 3.9891e-01, PNorm = 57.8860, GNorm = 1.3824, lr_0 = 4.4880e-04
Loss = 3.7211e-01, PNorm = 57.8879, GNorm = 1.0685, lr_0 = 4.4849e-04
Loss = 3.8382e-01, PNorm = 57.8959, GNorm = 1.4521, lr_0 = 4.4819e-04
Loss = 3.9244e-01, PNorm = 57.9071, GNorm = 1.5277, lr_0 = 4.4788e-04
Loss = 3.6956e-01, PNorm = 57.9109, GNorm = 1.1471, lr_0 = 4.4757e-04
Loss = 3.3323e-01, PNorm = 57.9152, GNorm = 1.2831, lr_0 = 4.4727e-04
Loss = 4.1048e-01, PNorm = 57.9203, GNorm = 1.2464, lr_0 = 4.4696e-04
Loss = 3.4868e-01, PNorm = 57.9264, GNorm = 1.2358, lr_0 = 4.4665e-04
Loss = 4.1752e-01, PNorm = 57.9348, GNorm = 1.3299, lr_0 = 4.4635e-04
Loss = 3.7257e-01, PNorm = 57.9416, GNorm = 1.3384, lr_0 = 4.4604e-04
Loss = 4.0508e-01, PNorm = 57.9512, GNorm = 0.8995, lr_0 = 4.4574e-04
Loss = 4.6960e-01, PNorm = 57.9601, GNorm = 2.4165, lr_0 = 4.4543e-04
Loss = 3.7913e-01, PNorm = 57.9684, GNorm = 1.4858, lr_0 = 4.4513e-04
Loss = 4.5389e-01, PNorm = 57.9829, GNorm = 1.8221, lr_0 = 4.4482e-04
Loss = 3.6317e-01, PNorm = 57.9933, GNorm = 1.3862, lr_0 = 4.4452e-04
Loss = 4.0971e-01, PNorm = 57.9963, GNorm = 1.8075, lr_0 = 4.4421e-04
Loss = 4.1188e-01, PNorm = 58.0009, GNorm = 1.4049, lr_0 = 4.4391e-04
Loss = 4.4748e-01, PNorm = 58.0001, GNorm = 1.4828, lr_0 = 4.4360e-04
Loss = 4.5885e-01, PNorm = 58.0042, GNorm = 1.8752, lr_0 = 4.4330e-04
Loss = 4.2178e-01, PNorm = 58.0130, GNorm = 1.5707, lr_0 = 4.4299e-04
Loss = 4.7994e-01, PNorm = 58.0264, GNorm = 0.9863, lr_0 = 4.4269e-04
Loss = 3.5736e-01, PNorm = 58.0364, GNorm = 1.1826, lr_0 = 4.4239e-04
Loss = 4.6917e-01, PNorm = 58.0421, GNorm = 0.9108, lr_0 = 4.4209e-04
Loss = 4.1123e-01, PNorm = 58.0481, GNorm = 1.2564, lr_0 = 4.4178e-04
Loss = 3.7462e-01, PNorm = 58.0560, GNorm = 1.1241, lr_0 = 4.4148e-04
Loss = 4.1525e-01, PNorm = 58.0653, GNorm = 1.2651, lr_0 = 4.4118e-04
Loss = 3.6065e-01, PNorm = 58.0735, GNorm = 1.1201, lr_0 = 4.4088e-04
Loss = 4.0752e-01, PNorm = 58.0832, GNorm = 1.1364, lr_0 = 4.4057e-04
Loss = 3.9370e-01, PNorm = 58.0902, GNorm = 1.4770, lr_0 = 4.4027e-04
Loss = 3.9199e-01, PNorm = 58.1021, GNorm = 1.2323, lr_0 = 4.3997e-04
Loss = 3.8355e-01, PNorm = 58.1126, GNorm = 1.1842, lr_0 = 4.3967e-04
Loss = 3.9470e-01, PNorm = 58.1152, GNorm = 1.2774, lr_0 = 4.3937e-04
Validation mae = 0.115487
Epoch 12
Loss = 3.6708e-01, PNorm = 58.1217, GNorm = 0.9073, lr_0 = 4.3907e-04
Loss = 4.6110e-01, PNorm = 58.1299, GNorm = 2.6173, lr_0 = 4.3877e-04
Loss = 4.8681e-01, PNorm = 58.1356, GNorm = 1.2423, lr_0 = 4.3846e-04
Loss = 3.7638e-01, PNorm = 58.1441, GNorm = 0.9643, lr_0 = 4.3816e-04
Loss = 3.5944e-01, PNorm = 58.1502, GNorm = 2.5296, lr_0 = 4.3786e-04
Loss = 4.3510e-01, PNorm = 58.1534, GNorm = 1.6964, lr_0 = 4.3756e-04
Loss = 4.0948e-01, PNorm = 58.1603, GNorm = 0.9514, lr_0 = 4.3726e-04
Loss = 3.7299e-01, PNorm = 58.1649, GNorm = 1.5006, lr_0 = 4.3696e-04
Loss = 3.7002e-01, PNorm = 58.1721, GNorm = 1.2666, lr_0 = 4.3667e-04
Loss = 4.2058e-01, PNorm = 58.1746, GNorm = 1.1682, lr_0 = 4.3637e-04
Loss = 3.9666e-01, PNorm = 58.1778, GNorm = 1.5532, lr_0 = 4.3607e-04
Loss = 4.1133e-01, PNorm = 58.1840, GNorm = 1.8404, lr_0 = 4.3577e-04
Loss = 3.7941e-01, PNorm = 58.1931, GNorm = 1.4080, lr_0 = 4.3547e-04
Loss = 4.0698e-01, PNorm = 58.2016, GNorm = 1.5308, lr_0 = 4.3517e-04
Loss = 4.2822e-01, PNorm = 58.2107, GNorm = 1.4861, lr_0 = 4.3487e-04
Loss = 3.6399e-01, PNorm = 58.2185, GNorm = 1.3394, lr_0 = 4.3458e-04
Loss = 3.5433e-01, PNorm = 58.2212, GNorm = 1.3753, lr_0 = 4.3428e-04
Loss = 3.8904e-01, PNorm = 58.2286, GNorm = 1.3428, lr_0 = 4.3398e-04
Loss = 3.6461e-01, PNorm = 58.2329, GNorm = 1.1336, lr_0 = 4.3368e-04
Loss = 3.5911e-01, PNorm = 58.2407, GNorm = 1.5803, lr_0 = 4.3339e-04
Loss = 3.3120e-01, PNorm = 58.2443, GNorm = 1.0737, lr_0 = 4.3309e-04
Loss = 4.0835e-01, PNorm = 58.2509, GNorm = 1.1120, lr_0 = 4.3279e-04
Loss = 3.6531e-01, PNorm = 58.2588, GNorm = 1.5747, lr_0 = 4.3250e-04
Loss = 3.9211e-01, PNorm = 58.2682, GNorm = 1.6468, lr_0 = 4.3220e-04
Loss = 4.2345e-01, PNorm = 58.2816, GNorm = 1.6847, lr_0 = 4.3190e-04
Loss = 4.0447e-01, PNorm = 58.2940, GNorm = 1.1654, lr_0 = 4.3161e-04
Loss = 4.1213e-01, PNorm = 58.3076, GNorm = 2.0869, lr_0 = 4.3131e-04
Loss = 3.6810e-01, PNorm = 58.3162, GNorm = 1.7608, lr_0 = 4.3102e-04
Loss = 4.6632e-01, PNorm = 58.3246, GNorm = 0.9971, lr_0 = 4.3072e-04
Loss = 3.8150e-01, PNorm = 58.3338, GNorm = 1.1548, lr_0 = 4.3043e-04
Loss = 3.6328e-01, PNorm = 58.3350, GNorm = 1.2545, lr_0 = 4.3013e-04
Loss = 4.3614e-01, PNorm = 58.3397, GNorm = 1.0587, lr_0 = 4.2984e-04
Loss = 3.7911e-01, PNorm = 58.3474, GNorm = 1.0680, lr_0 = 4.2954e-04
Loss = 3.7837e-01, PNorm = 58.3602, GNorm = 1.1179, lr_0 = 4.2925e-04
Loss = 3.6576e-01, PNorm = 58.3691, GNorm = 1.4581, lr_0 = 4.2895e-04
Loss = 3.9812e-01, PNorm = 58.3791, GNorm = 0.9360, lr_0 = 4.2866e-04
Loss = 4.2620e-01, PNorm = 58.3881, GNorm = 1.2016, lr_0 = 4.2837e-04
Loss = 4.3624e-01, PNorm = 58.3922, GNorm = 1.2018, lr_0 = 4.2807e-04
Loss = 4.0267e-01, PNorm = 58.3983, GNorm = 2.1572, lr_0 = 4.2778e-04
Loss = 3.7759e-01, PNorm = 58.4076, GNorm = 2.0948, lr_0 = 4.2749e-04
Loss = 3.8833e-01, PNorm = 58.4117, GNorm = 1.1466, lr_0 = 4.2719e-04
Loss = 4.3055e-01, PNorm = 58.4219, GNorm = 1.2656, lr_0 = 4.2690e-04
Loss = 3.9400e-01, PNorm = 58.4292, GNorm = 1.3742, lr_0 = 4.2661e-04
Loss = 4.2252e-01, PNorm = 58.4409, GNorm = 1.6946, lr_0 = 4.2632e-04
Loss = 4.0669e-01, PNorm = 58.4468, GNorm = 1.4739, lr_0 = 4.2602e-04
Loss = 3.9023e-01, PNorm = 58.4553, GNorm = 1.1747, lr_0 = 4.2573e-04
Loss = 3.9069e-01, PNorm = 58.4648, GNorm = 0.9652, lr_0 = 4.2544e-04
Loss = 4.0081e-01, PNorm = 58.4714, GNorm = 1.6468, lr_0 = 4.2515e-04
Loss = 3.6695e-01, PNorm = 58.4780, GNorm = 1.6215, lr_0 = 4.2486e-04
Loss = 4.3035e-01, PNorm = 58.4838, GNorm = 2.0190, lr_0 = 4.2457e-04
Loss = 3.9715e-01, PNorm = 58.4873, GNorm = 1.1268, lr_0 = 4.2428e-04
Loss = 4.6171e-01, PNorm = 58.4945, GNorm = 2.0707, lr_0 = 4.2399e-04
Loss = 3.9463e-01, PNorm = 58.5074, GNorm = 1.1685, lr_0 = 4.2370e-04
Loss = 4.0239e-01, PNorm = 58.5217, GNorm = 1.3454, lr_0 = 4.2340e-04
Loss = 3.4848e-01, PNorm = 58.5375, GNorm = 1.1154, lr_0 = 4.2311e-04
Loss = 3.9282e-01, PNorm = 58.5487, GNorm = 1.4733, lr_0 = 4.2283e-04
Loss = 3.8962e-01, PNorm = 58.5515, GNorm = 1.1645, lr_0 = 4.2254e-04
Loss = 4.1632e-01, PNorm = 58.5559, GNorm = 1.8993, lr_0 = 4.2225e-04
Loss = 3.6169e-01, PNorm = 58.5589, GNorm = 1.4097, lr_0 = 4.2196e-04
Loss = 3.5660e-01, PNorm = 58.5659, GNorm = 1.3827, lr_0 = 4.2167e-04
Loss = 4.2858e-01, PNorm = 58.5690, GNorm = 1.7298, lr_0 = 4.2138e-04
Loss = 3.8581e-01, PNorm = 58.5773, GNorm = 1.6590, lr_0 = 4.2109e-04
Loss = 4.0297e-01, PNorm = 58.5822, GNorm = 1.0195, lr_0 = 4.2080e-04
Loss = 4.0172e-01, PNorm = 58.5926, GNorm = 1.4686, lr_0 = 4.2051e-04
Loss = 3.8620e-01, PNorm = 58.6040, GNorm = 1.2362, lr_0 = 4.2023e-04
Loss = 4.1931e-01, PNorm = 58.6127, GNorm = 0.8821, lr_0 = 4.1994e-04
Loss = 4.0653e-01, PNorm = 58.6169, GNorm = 1.0813, lr_0 = 4.1965e-04
Loss = 3.5781e-01, PNorm = 58.6212, GNorm = 1.3078, lr_0 = 4.1936e-04
Loss = 3.8090e-01, PNorm = 58.6248, GNorm = 1.2384, lr_0 = 4.1907e-04
Loss = 4.1522e-01, PNorm = 58.6282, GNorm = 1.1305, lr_0 = 4.1879e-04
Loss = 3.8172e-01, PNorm = 58.6349, GNorm = 1.3936, lr_0 = 4.1850e-04
Loss = 4.4278e-01, PNorm = 58.6336, GNorm = 1.4897, lr_0 = 4.1821e-04
Loss = 5.2413e-01, PNorm = 58.6390, GNorm = 2.6586, lr_0 = 4.1793e-04
Loss = 4.0199e-01, PNorm = 58.6389, GNorm = 1.2369, lr_0 = 4.1764e-04
Loss = 4.1225e-01, PNorm = 58.6477, GNorm = 1.4109, lr_0 = 4.1736e-04
Loss = 4.3879e-01, PNorm = 58.6532, GNorm = 1.4131, lr_0 = 4.1707e-04
Loss = 4.0628e-01, PNorm = 58.6585, GNorm = 1.0279, lr_0 = 4.1678e-04
Loss = 3.8777e-01, PNorm = 58.6634, GNorm = 1.5252, lr_0 = 4.1650e-04
Loss = 4.0959e-01, PNorm = 58.6759, GNorm = 1.5570, lr_0 = 4.1621e-04
Loss = 3.9760e-01, PNorm = 58.6786, GNorm = 1.6456, lr_0 = 4.1593e-04
Loss = 3.9788e-01, PNorm = 58.6854, GNorm = 2.1235, lr_0 = 4.1564e-04
Loss = 4.2115e-01, PNorm = 58.6919, GNorm = 1.8679, lr_0 = 4.1536e-04
Loss = 3.9215e-01, PNorm = 58.6966, GNorm = 1.2589, lr_0 = 4.1507e-04
Loss = 3.7813e-01, PNorm = 58.6983, GNorm = 1.2501, lr_0 = 4.1479e-04
Loss = 3.4842e-01, PNorm = 58.7023, GNorm = 1.6143, lr_0 = 4.1450e-04
Loss = 4.5923e-01, PNorm = 58.7085, GNorm = 1.5986, lr_0 = 4.1422e-04
Loss = 3.5170e-01, PNorm = 58.7188, GNorm = 0.9051, lr_0 = 4.1394e-04
Loss = 4.2579e-01, PNorm = 58.7254, GNorm = 1.1166, lr_0 = 4.1365e-04
Loss = 3.7133e-01, PNorm = 58.7309, GNorm = 1.4577, lr_0 = 4.1337e-04
Loss = 4.3502e-01, PNorm = 58.7324, GNorm = 1.2446, lr_0 = 4.1309e-04
Loss = 4.3690e-01, PNorm = 58.7392, GNorm = 1.6177, lr_0 = 4.1280e-04
Loss = 3.7613e-01, PNorm = 58.7452, GNorm = 1.2589, lr_0 = 4.1252e-04
Loss = 4.6897e-01, PNorm = 58.7533, GNorm = 1.5520, lr_0 = 4.1224e-04
Loss = 4.1076e-01, PNorm = 58.7584, GNorm = 1.9455, lr_0 = 4.1196e-04
Loss = 4.2354e-01, PNorm = 58.7671, GNorm = 2.3341, lr_0 = 4.1167e-04
Loss = 4.0403e-01, PNorm = 58.7735, GNorm = 1.3696, lr_0 = 4.1139e-04
Loss = 4.0402e-01, PNorm = 58.7809, GNorm = 1.0995, lr_0 = 4.1111e-04
Loss = 4.3652e-01, PNorm = 58.7888, GNorm = 1.7107, lr_0 = 4.1083e-04
Loss = 4.4348e-01, PNorm = 58.7982, GNorm = 1.1636, lr_0 = 4.1055e-04
Loss = 3.5236e-01, PNorm = 58.8049, GNorm = 1.3140, lr_0 = 4.1027e-04
Loss = 4.4534e-01, PNorm = 58.8182, GNorm = 1.7760, lr_0 = 4.0998e-04
Loss = 3.8701e-01, PNorm = 58.8275, GNorm = 1.4426, lr_0 = 4.0970e-04
Loss = 4.2689e-01, PNorm = 58.8344, GNorm = 1.2988, lr_0 = 4.0942e-04
Loss = 4.6865e-01, PNorm = 58.8376, GNorm = 1.0102, lr_0 = 4.0914e-04
Loss = 3.8039e-01, PNorm = 58.8455, GNorm = 1.9759, lr_0 = 4.0886e-04
Loss = 3.8430e-01, PNorm = 58.8501, GNorm = 1.6460, lr_0 = 4.0858e-04
Loss = 4.5370e-01, PNorm = 58.8604, GNorm = 1.6252, lr_0 = 4.0830e-04
Loss = 4.0317e-01, PNorm = 58.8670, GNorm = 1.2088, lr_0 = 4.0802e-04
Loss = 3.9554e-01, PNorm = 58.8742, GNorm = 1.2553, lr_0 = 4.0774e-04
Loss = 3.9792e-01, PNorm = 58.8836, GNorm = 1.2281, lr_0 = 4.0746e-04
Loss = 3.7850e-01, PNorm = 58.8874, GNorm = 1.5465, lr_0 = 4.0718e-04
Loss = 4.1971e-01, PNorm = 58.8886, GNorm = 1.5245, lr_0 = 4.0691e-04
Loss = 3.6780e-01, PNorm = 58.8931, GNorm = 1.4174, lr_0 = 4.0663e-04
Loss = 4.4342e-01, PNorm = 58.8978, GNorm = 1.7579, lr_0 = 4.0635e-04
Loss = 4.1961e-01, PNorm = 58.9024, GNorm = 1.3881, lr_0 = 4.0607e-04
Loss = 3.7475e-01, PNorm = 58.9078, GNorm = 1.5313, lr_0 = 4.0579e-04
Loss = 4.2151e-01, PNorm = 58.9167, GNorm = 1.7221, lr_0 = 4.0551e-04
Loss = 4.1100e-01, PNorm = 58.9222, GNorm = 1.8724, lr_0 = 4.0524e-04
Loss = 3.8273e-01, PNorm = 58.9321, GNorm = 1.0838, lr_0 = 4.0496e-04
Loss = 3.8309e-01, PNorm = 58.9361, GNorm = 1.6410, lr_0 = 4.0468e-04
Validation mae = 0.113704
Epoch 13
Loss = 3.8121e-01, PNorm = 58.9376, GNorm = 1.5718, lr_0 = 4.0440e-04
Loss = 3.9880e-01, PNorm = 58.9434, GNorm = 1.4532, lr_0 = 4.0413e-04
Loss = 3.5770e-01, PNorm = 58.9502, GNorm = 1.3265, lr_0 = 4.0385e-04
Loss = 4.0025e-01, PNorm = 58.9553, GNorm = 1.4521, lr_0 = 4.0357e-04
Loss = 3.5233e-01, PNorm = 58.9636, GNorm = 1.2450, lr_0 = 4.0330e-04
Loss = 3.9828e-01, PNorm = 58.9706, GNorm = 1.4242, lr_0 = 4.0302e-04
Loss = 4.1256e-01, PNorm = 58.9820, GNorm = 1.4186, lr_0 = 4.0274e-04
Loss = 3.7647e-01, PNorm = 58.9916, GNorm = 1.0270, lr_0 = 4.0247e-04
Loss = 3.4436e-01, PNorm = 59.0013, GNorm = 2.1245, lr_0 = 4.0219e-04
Loss = 4.3054e-01, PNorm = 59.0047, GNorm = 1.6332, lr_0 = 4.0192e-04
Loss = 3.7791e-01, PNorm = 59.0181, GNorm = 1.0937, lr_0 = 4.0164e-04
Loss = 3.8334e-01, PNorm = 59.0238, GNorm = 1.3236, lr_0 = 4.0137e-04
Loss = 5.2704e-01, PNorm = 59.0302, GNorm = 1.3503, lr_0 = 4.0109e-04
Loss = 3.6979e-01, PNorm = 59.0396, GNorm = 2.0289, lr_0 = 4.0082e-04
Loss = 3.7186e-01, PNorm = 59.0438, GNorm = 1.4620, lr_0 = 4.0054e-04
Loss = 3.6759e-01, PNorm = 59.0463, GNorm = 1.0881, lr_0 = 4.0027e-04
Loss = 4.3853e-01, PNorm = 59.0507, GNorm = 1.3875, lr_0 = 3.9999e-04
Loss = 4.1828e-01, PNorm = 59.0565, GNorm = 1.4181, lr_0 = 3.9972e-04
Loss = 4.3503e-01, PNorm = 59.0662, GNorm = 1.6636, lr_0 = 3.9945e-04
Loss = 4.2534e-01, PNorm = 59.0724, GNorm = 1.0672, lr_0 = 3.9917e-04
Loss = 4.0700e-01, PNorm = 59.0784, GNorm = 2.0667, lr_0 = 3.9890e-04
Loss = 4.0329e-01, PNorm = 59.0823, GNorm = 1.3592, lr_0 = 3.9863e-04
Loss = 3.6259e-01, PNorm = 59.0899, GNorm = 1.1359, lr_0 = 3.9835e-04
Loss = 4.0533e-01, PNorm = 59.0965, GNorm = 2.1871, lr_0 = 3.9808e-04
Loss = 3.8909e-01, PNorm = 59.1008, GNorm = 1.3090, lr_0 = 3.9781e-04
Loss = 3.5562e-01, PNorm = 59.1045, GNorm = 1.3658, lr_0 = 3.9753e-04
Loss = 3.8747e-01, PNorm = 59.1133, GNorm = 1.2701, lr_0 = 3.9726e-04
Loss = 4.6117e-01, PNorm = 59.1206, GNorm = 1.7224, lr_0 = 3.9699e-04
Loss = 4.2913e-01, PNorm = 59.1300, GNorm = 1.3865, lr_0 = 3.9672e-04
Loss = 3.3832e-01, PNorm = 59.1312, GNorm = 1.3942, lr_0 = 3.9645e-04
Loss = 4.1355e-01, PNorm = 59.1393, GNorm = 1.8387, lr_0 = 3.9617e-04
Loss = 3.3632e-01, PNorm = 59.1460, GNorm = 1.0705, lr_0 = 3.9590e-04
Loss = 3.7296e-01, PNorm = 59.1500, GNorm = 1.1195, lr_0 = 3.9563e-04
Loss = 4.3363e-01, PNorm = 59.1564, GNorm = 1.3577, lr_0 = 3.9536e-04
Loss = 3.8010e-01, PNorm = 59.1611, GNorm = 1.9858, lr_0 = 3.9509e-04
Loss = 3.6316e-01, PNorm = 59.1663, GNorm = 1.1667, lr_0 = 3.9482e-04
Loss = 4.0502e-01, PNorm = 59.1759, GNorm = 1.5880, lr_0 = 3.9455e-04
Loss = 3.6529e-01, PNorm = 59.1827, GNorm = 1.7518, lr_0 = 3.9428e-04
Loss = 4.1278e-01, PNorm = 59.1912, GNorm = 1.4527, lr_0 = 3.9401e-04
Loss = 3.7719e-01, PNorm = 59.2019, GNorm = 1.2434, lr_0 = 3.9374e-04
Loss = 3.4225e-01, PNorm = 59.2096, GNorm = 1.6809, lr_0 = 3.9347e-04
Loss = 3.7613e-01, PNorm = 59.2117, GNorm = 1.4248, lr_0 = 3.9320e-04
Loss = 3.6864e-01, PNorm = 59.2178, GNorm = 1.2609, lr_0 = 3.9293e-04
Loss = 3.9638e-01, PNorm = 59.2220, GNorm = 1.8248, lr_0 = 3.9266e-04
Loss = 4.0331e-01, PNorm = 59.2284, GNorm = 1.5426, lr_0 = 3.9239e-04
Loss = 3.9318e-01, PNorm = 59.2335, GNorm = 2.3926, lr_0 = 3.9212e-04
Loss = 3.6136e-01, PNorm = 59.2352, GNorm = 1.3175, lr_0 = 3.9185e-04
Loss = 3.8136e-01, PNorm = 59.2400, GNorm = 1.8113, lr_0 = 3.9159e-04
Loss = 4.3958e-01, PNorm = 59.2442, GNorm = 1.2895, lr_0 = 3.9132e-04
Loss = 4.3320e-01, PNorm = 59.2520, GNorm = 1.5953, lr_0 = 3.9105e-04
Loss = 3.9693e-01, PNorm = 59.2556, GNorm = 1.5267, lr_0 = 3.9078e-04
Loss = 4.0974e-01, PNorm = 59.2614, GNorm = 0.8637, lr_0 = 3.9051e-04
Loss = 3.8499e-01, PNorm = 59.2685, GNorm = 1.4393, lr_0 = 3.9025e-04
Loss = 3.4995e-01, PNorm = 59.2741, GNorm = 1.7108, lr_0 = 3.8998e-04
Loss = 3.5891e-01, PNorm = 59.2794, GNorm = 1.8115, lr_0 = 3.8971e-04
Loss = 4.3576e-01, PNorm = 59.2825, GNorm = 2.2274, lr_0 = 3.8945e-04
Loss = 4.3719e-01, PNorm = 59.2865, GNorm = 1.5621, lr_0 = 3.8918e-04
Loss = 4.0840e-01, PNorm = 59.2924, GNorm = 1.2609, lr_0 = 3.8891e-04
Loss = 3.8858e-01, PNorm = 59.2971, GNorm = 1.1641, lr_0 = 3.8865e-04
Loss = 3.8657e-01, PNorm = 59.3014, GNorm = 2.0813, lr_0 = 3.8838e-04
Loss = 3.7232e-01, PNorm = 59.3022, GNorm = 1.1692, lr_0 = 3.8811e-04
Loss = 4.3809e-01, PNorm = 59.3100, GNorm = 1.5336, lr_0 = 3.8785e-04
Loss = 3.7612e-01, PNorm = 59.3162, GNorm = 0.9572, lr_0 = 3.8758e-04
Loss = 3.5881e-01, PNorm = 59.3232, GNorm = 1.2162, lr_0 = 3.8732e-04
Loss = 3.8061e-01, PNorm = 59.3324, GNorm = 1.3458, lr_0 = 3.8705e-04
Loss = 3.4951e-01, PNorm = 59.3369, GNorm = 1.4989, lr_0 = 3.8679e-04
Loss = 4.2362e-01, PNorm = 59.3431, GNorm = 1.0398, lr_0 = 3.8652e-04
Loss = 4.2507e-01, PNorm = 59.3472, GNorm = 1.2111, lr_0 = 3.8626e-04
Loss = 4.1093e-01, PNorm = 59.3558, GNorm = 1.5595, lr_0 = 3.8599e-04
Loss = 4.0990e-01, PNorm = 59.3625, GNorm = 0.9889, lr_0 = 3.8573e-04
Loss = 3.7204e-01, PNorm = 59.3618, GNorm = 1.4274, lr_0 = 3.8546e-04
Loss = 4.5594e-01, PNorm = 59.3688, GNorm = 1.2636, lr_0 = 3.8520e-04
Loss = 3.7584e-01, PNorm = 59.3786, GNorm = 1.3786, lr_0 = 3.8493e-04
Loss = 3.5494e-01, PNorm = 59.3861, GNorm = 1.0503, lr_0 = 3.8467e-04
Loss = 4.1995e-01, PNorm = 59.3928, GNorm = 1.6481, lr_0 = 3.8441e-04
Loss = 3.8145e-01, PNorm = 59.3982, GNorm = 1.0239, lr_0 = 3.8414e-04
Loss = 3.6712e-01, PNorm = 59.4027, GNorm = 1.5330, lr_0 = 3.8388e-04
Loss = 3.8197e-01, PNorm = 59.4102, GNorm = 1.1046, lr_0 = 3.8362e-04
Loss = 4.3560e-01, PNorm = 59.4213, GNorm = 2.1655, lr_0 = 3.8336e-04
Loss = 4.1661e-01, PNorm = 59.4262, GNorm = 1.7657, lr_0 = 3.8309e-04
Loss = 4.1202e-01, PNorm = 59.4284, GNorm = 1.2325, lr_0 = 3.8283e-04
Loss = 4.3913e-01, PNorm = 59.4361, GNorm = 1.6452, lr_0 = 3.8257e-04
Loss = 3.5745e-01, PNorm = 59.4455, GNorm = 1.2253, lr_0 = 3.8231e-04
Loss = 3.5723e-01, PNorm = 59.4554, GNorm = 1.1662, lr_0 = 3.8204e-04
Loss = 4.0886e-01, PNorm = 59.4626, GNorm = 1.4295, lr_0 = 3.8178e-04
Loss = 3.5551e-01, PNorm = 59.4739, GNorm = 1.2301, lr_0 = 3.8152e-04
Loss = 3.8421e-01, PNorm = 59.4783, GNorm = 1.8045, lr_0 = 3.8126e-04
Loss = 3.7080e-01, PNorm = 59.4826, GNorm = 1.1429, lr_0 = 3.8100e-04
Loss = 3.6895e-01, PNorm = 59.4889, GNorm = 1.3975, lr_0 = 3.8074e-04
Loss = 4.0216e-01, PNorm = 59.4958, GNorm = 1.6516, lr_0 = 3.8048e-04
Loss = 4.3103e-01, PNorm = 59.5062, GNorm = 1.2559, lr_0 = 3.8022e-04
Loss = 3.7636e-01, PNorm = 59.5152, GNorm = 1.2428, lr_0 = 3.7995e-04
Loss = 4.6595e-01, PNorm = 59.5195, GNorm = 1.4355, lr_0 = 3.7969e-04
Loss = 4.1157e-01, PNorm = 59.5268, GNorm = 1.6417, lr_0 = 3.7943e-04
Loss = 3.9801e-01, PNorm = 59.5305, GNorm = 1.0668, lr_0 = 3.7917e-04
Loss = 3.4985e-01, PNorm = 59.5368, GNorm = 0.7122, lr_0 = 3.7891e-04
Loss = 3.3316e-01, PNorm = 59.5441, GNorm = 1.2290, lr_0 = 3.7866e-04
Loss = 4.5480e-01, PNorm = 59.5489, GNorm = 1.4142, lr_0 = 3.7840e-04
Loss = 4.2805e-01, PNorm = 59.5533, GNorm = 1.2490, lr_0 = 3.7814e-04
Loss = 4.0532e-01, PNorm = 59.5570, GNorm = 1.3757, lr_0 = 3.7788e-04
Loss = 3.9806e-01, PNorm = 59.5614, GNorm = 1.3911, lr_0 = 3.7762e-04
Loss = 4.0677e-01, PNorm = 59.5676, GNorm = 1.9062, lr_0 = 3.7736e-04
Loss = 4.3820e-01, PNorm = 59.5718, GNorm = 1.2842, lr_0 = 3.7710e-04
Loss = 4.6228e-01, PNorm = 59.5797, GNorm = 1.4528, lr_0 = 3.7684e-04
Loss = 4.0428e-01, PNorm = 59.5838, GNorm = 1.3792, lr_0 = 3.7659e-04
Loss = 3.7486e-01, PNorm = 59.5875, GNorm = 1.4080, lr_0 = 3.7633e-04
Loss = 3.8257e-01, PNorm = 59.5900, GNorm = 1.4628, lr_0 = 3.7607e-04
Loss = 4.0842e-01, PNorm = 59.5915, GNorm = 1.7872, lr_0 = 3.7581e-04
Loss = 3.8513e-01, PNorm = 59.5963, GNorm = 1.1793, lr_0 = 3.7555e-04
Loss = 3.6256e-01, PNorm = 59.6041, GNorm = 1.1621, lr_0 = 3.7530e-04
Loss = 3.5138e-01, PNorm = 59.6088, GNorm = 1.2779, lr_0 = 3.7504e-04
Loss = 3.8395e-01, PNorm = 59.6139, GNorm = 1.6079, lr_0 = 3.7478e-04
Loss = 3.5883e-01, PNorm = 59.6179, GNorm = 1.6035, lr_0 = 3.7453e-04
Loss = 3.8892e-01, PNorm = 59.6174, GNorm = 1.5109, lr_0 = 3.7427e-04
Loss = 4.3791e-01, PNorm = 59.6259, GNorm = 1.1875, lr_0 = 3.7401e-04
Loss = 4.0065e-01, PNorm = 59.6289, GNorm = 1.8410, lr_0 = 3.7376e-04
Loss = 4.0943e-01, PNorm = 59.6323, GNorm = 1.6627, lr_0 = 3.7350e-04
Loss = 3.8589e-01, PNorm = 59.6395, GNorm = 1.2519, lr_0 = 3.7325e-04
Loss = 3.9027e-01, PNorm = 59.6486, GNorm = 1.2156, lr_0 = 3.7299e-04
Loss = 4.0843e-01, PNorm = 59.6555, GNorm = 1.1603, lr_0 = 3.7273e-04
Validation mae = 0.113630
Epoch 14
Loss = 3.9628e-01, PNorm = 59.6628, GNorm = 1.1483, lr_0 = 3.7248e-04
Loss = 3.6712e-01, PNorm = 59.6651, GNorm = 1.4164, lr_0 = 3.7222e-04
Loss = 3.8742e-01, PNorm = 59.6655, GNorm = 1.6050, lr_0 = 3.7197e-04
Loss = 3.7628e-01, PNorm = 59.6718, GNorm = 1.3948, lr_0 = 3.7171e-04
Loss = 3.8126e-01, PNorm = 59.6790, GNorm = 1.1587, lr_0 = 3.7146e-04
Loss = 4.1372e-01, PNorm = 59.6861, GNorm = 1.1227, lr_0 = 3.7120e-04
Loss = 4.1386e-01, PNorm = 59.6923, GNorm = 2.4546, lr_0 = 3.7095e-04
Loss = 3.5120e-01, PNorm = 59.6980, GNorm = 1.4852, lr_0 = 3.7070e-04
Loss = 3.7451e-01, PNorm = 59.7054, GNorm = 1.4093, lr_0 = 3.7044e-04
Loss = 4.3739e-01, PNorm = 59.7118, GNorm = 2.5355, lr_0 = 3.7019e-04
Loss = 3.2660e-01, PNorm = 59.7254, GNorm = 1.5635, lr_0 = 3.6993e-04
Loss = 4.0826e-01, PNorm = 59.7338, GNorm = 1.1611, lr_0 = 3.6968e-04
Loss = 3.5111e-01, PNorm = 59.7416, GNorm = 1.9713, lr_0 = 3.6943e-04
Loss = 4.3687e-01, PNorm = 59.7473, GNorm = 1.9211, lr_0 = 3.6917e-04
Loss = 3.5266e-01, PNorm = 59.7488, GNorm = 1.1943, lr_0 = 3.6892e-04
Loss = 3.4772e-01, PNorm = 59.7512, GNorm = 1.9244, lr_0 = 3.6867e-04
Loss = 3.9401e-01, PNorm = 59.7595, GNorm = 1.3442, lr_0 = 3.6842e-04
Loss = 4.1652e-01, PNorm = 59.7628, GNorm = 1.7728, lr_0 = 3.6816e-04
Loss = 3.7834e-01, PNorm = 59.7659, GNorm = 1.4960, lr_0 = 3.6791e-04
Loss = 3.4265e-01, PNorm = 59.7703, GNorm = 1.5083, lr_0 = 3.6766e-04
Loss = 3.7386e-01, PNorm = 59.7699, GNorm = 1.2478, lr_0 = 3.6741e-04
Loss = 3.5286e-01, PNorm = 59.7729, GNorm = 1.2334, lr_0 = 3.6716e-04
Loss = 3.8237e-01, PNorm = 59.7778, GNorm = 1.2534, lr_0 = 3.6690e-04
Loss = 3.8887e-01, PNorm = 59.7826, GNorm = 1.1356, lr_0 = 3.6665e-04
Loss = 3.9693e-01, PNorm = 59.7909, GNorm = 1.9889, lr_0 = 3.6640e-04
Loss = 4.0781e-01, PNorm = 59.8006, GNorm = 1.5806, lr_0 = 3.6615e-04
Loss = 3.7123e-01, PNorm = 59.8060, GNorm = 1.2071, lr_0 = 3.6590e-04
Loss = 4.0155e-01, PNorm = 59.8085, GNorm = 1.5773, lr_0 = 3.6565e-04
Loss = 3.8884e-01, PNorm = 59.8170, GNorm = 1.4698, lr_0 = 3.6540e-04
Loss = 4.0188e-01, PNorm = 59.8234, GNorm = 1.5706, lr_0 = 3.6515e-04
Loss = 3.6577e-01, PNorm = 59.8282, GNorm = 1.9665, lr_0 = 3.6490e-04
Loss = 3.9549e-01, PNorm = 59.8299, GNorm = 1.4449, lr_0 = 3.6465e-04
Loss = 3.8903e-01, PNorm = 59.8345, GNorm = 1.2381, lr_0 = 3.6440e-04
Loss = 4.1464e-01, PNorm = 59.8415, GNorm = 1.2045, lr_0 = 3.6415e-04
Loss = 4.2741e-01, PNorm = 59.8485, GNorm = 2.6804, lr_0 = 3.6390e-04
Loss = 4.0452e-01, PNorm = 59.8544, GNorm = 1.4952, lr_0 = 3.6365e-04
Loss = 4.5825e-01, PNorm = 59.8615, GNorm = 2.0201, lr_0 = 3.6340e-04
Loss = 3.5554e-01, PNorm = 59.8691, GNorm = 1.0980, lr_0 = 3.6315e-04
Loss = 3.2507e-01, PNorm = 59.8753, GNorm = 1.0557, lr_0 = 3.6290e-04
Loss = 3.5856e-01, PNorm = 59.8803, GNorm = 1.4689, lr_0 = 3.6266e-04
Loss = 3.3883e-01, PNorm = 59.8874, GNorm = 1.2499, lr_0 = 3.6241e-04
Loss = 4.3816e-01, PNorm = 59.8915, GNorm = 1.2694, lr_0 = 3.6216e-04
Loss = 3.8079e-01, PNorm = 59.9008, GNorm = 1.1078, lr_0 = 3.6191e-04
Loss = 3.8479e-01, PNorm = 59.9051, GNorm = 1.6140, lr_0 = 3.6166e-04
Loss = 3.8049e-01, PNorm = 59.9089, GNorm = 1.0772, lr_0 = 3.6141e-04
Loss = 3.7109e-01, PNorm = 59.9143, GNorm = 1.3384, lr_0 = 3.6117e-04
Loss = 3.4070e-01, PNorm = 59.9165, GNorm = 1.1599, lr_0 = 3.6092e-04
Loss = 3.8877e-01, PNorm = 59.9199, GNorm = 1.4821, lr_0 = 3.6067e-04
Loss = 4.3313e-01, PNorm = 59.9268, GNorm = 1.5032, lr_0 = 3.6043e-04
Loss = 3.5129e-01, PNorm = 59.9281, GNorm = 1.3307, lr_0 = 3.6018e-04
Loss = 3.5909e-01, PNorm = 59.9344, GNorm = 1.7595, lr_0 = 3.5993e-04
Loss = 3.9556e-01, PNorm = 59.9413, GNorm = 1.2596, lr_0 = 3.5969e-04
Loss = 3.8110e-01, PNorm = 59.9447, GNorm = 1.1999, lr_0 = 3.5944e-04
Loss = 3.3073e-01, PNorm = 59.9548, GNorm = 1.2516, lr_0 = 3.5919e-04
Loss = 3.5547e-01, PNorm = 59.9606, GNorm = 1.1174, lr_0 = 3.5895e-04
Loss = 3.6941e-01, PNorm = 59.9621, GNorm = 1.9613, lr_0 = 3.5870e-04
Loss = 3.9639e-01, PNorm = 59.9640, GNorm = 1.1668, lr_0 = 3.5845e-04
Loss = 3.3830e-01, PNorm = 59.9673, GNorm = 1.2519, lr_0 = 3.5821e-04
Loss = 4.1775e-01, PNorm = 59.9727, GNorm = 1.3543, lr_0 = 3.5796e-04
Loss = 3.7559e-01, PNorm = 59.9844, GNorm = 1.5560, lr_0 = 3.5772e-04
Loss = 3.9870e-01, PNorm = 59.9866, GNorm = 1.0258, lr_0 = 3.5747e-04
Loss = 3.6338e-01, PNorm = 59.9940, GNorm = 1.3478, lr_0 = 3.5723e-04
Loss = 4.3482e-01, PNorm = 59.9970, GNorm = 1.2316, lr_0 = 3.5698e-04
Loss = 3.4684e-01, PNorm = 60.0002, GNorm = 1.9316, lr_0 = 3.5674e-04
Loss = 3.8266e-01, PNorm = 60.0045, GNorm = 1.0824, lr_0 = 3.5650e-04
Loss = 3.7127e-01, PNorm = 60.0112, GNorm = 1.3732, lr_0 = 3.5625e-04
Loss = 4.3116e-01, PNorm = 60.0176, GNorm = 1.4550, lr_0 = 3.5601e-04
Loss = 3.8250e-01, PNorm = 60.0248, GNorm = 1.0577, lr_0 = 3.5576e-04
Loss = 4.4569e-01, PNorm = 60.0277, GNorm = 1.2484, lr_0 = 3.5552e-04
Loss = 3.5815e-01, PNorm = 60.0280, GNorm = 1.6612, lr_0 = 3.5528e-04
Loss = 4.4184e-01, PNorm = 60.0299, GNorm = 1.5596, lr_0 = 3.5503e-04
Loss = 3.6609e-01, PNorm = 60.0412, GNorm = 1.2493, lr_0 = 3.5479e-04
Loss = 3.6227e-01, PNorm = 60.0418, GNorm = 0.9493, lr_0 = 3.5455e-04
Loss = 3.4646e-01, PNorm = 60.0440, GNorm = 1.2262, lr_0 = 3.5430e-04
Loss = 4.1154e-01, PNorm = 60.0472, GNorm = 1.1972, lr_0 = 3.5406e-04
Loss = 4.4911e-01, PNorm = 60.0473, GNorm = 1.0898, lr_0 = 3.5382e-04
Loss = 3.9883e-01, PNorm = 60.0466, GNorm = 1.2294, lr_0 = 3.5358e-04
Loss = 4.1425e-01, PNorm = 60.0565, GNorm = 1.4284, lr_0 = 3.5333e-04
Loss = 3.8808e-01, PNorm = 60.0594, GNorm = 1.7693, lr_0 = 3.5309e-04
Loss = 3.8331e-01, PNorm = 60.0702, GNorm = 1.2621, lr_0 = 3.5285e-04
Loss = 4.4004e-01, PNorm = 60.0773, GNorm = 1.2070, lr_0 = 3.5261e-04
Loss = 3.6657e-01, PNorm = 60.0851, GNorm = 1.3265, lr_0 = 3.5237e-04
Loss = 3.5750e-01, PNorm = 60.0900, GNorm = 1.2982, lr_0 = 3.5212e-04
Loss = 3.9015e-01, PNorm = 60.0950, GNorm = 1.3588, lr_0 = 3.5188e-04
Loss = 3.7915e-01, PNorm = 60.1000, GNorm = 1.3646, lr_0 = 3.5164e-04
Loss = 4.2803e-01, PNorm = 60.1047, GNorm = 1.2640, lr_0 = 3.5140e-04
Loss = 3.9871e-01, PNorm = 60.1057, GNorm = 1.4244, lr_0 = 3.5116e-04
Loss = 3.6539e-01, PNorm = 60.1049, GNorm = 1.3756, lr_0 = 3.5092e-04
Loss = 3.8442e-01, PNorm = 60.1136, GNorm = 1.5138, lr_0 = 3.5068e-04
Loss = 3.9764e-01, PNorm = 60.1165, GNorm = 1.8393, lr_0 = 3.5044e-04
Loss = 4.3958e-01, PNorm = 60.1201, GNorm = 1.4700, lr_0 = 3.5020e-04
Loss = 4.0821e-01, PNorm = 60.1236, GNorm = 0.8530, lr_0 = 3.4996e-04
Loss = 3.9157e-01, PNorm = 60.1290, GNorm = 1.3139, lr_0 = 3.4972e-04
Loss = 3.8022e-01, PNorm = 60.1344, GNorm = 1.1665, lr_0 = 3.4948e-04
Loss = 3.7873e-01, PNorm = 60.1375, GNorm = 1.8902, lr_0 = 3.4924e-04
Loss = 3.8705e-01, PNorm = 60.1455, GNorm = 1.2822, lr_0 = 3.4900e-04
Loss = 3.5550e-01, PNorm = 60.1492, GNorm = 1.5325, lr_0 = 3.4876e-04
Loss = 4.0347e-01, PNorm = 60.1560, GNorm = 1.4851, lr_0 = 3.4852e-04
Loss = 4.0353e-01, PNorm = 60.1638, GNorm = 1.3133, lr_0 = 3.4828e-04
Loss = 4.1045e-01, PNorm = 60.1688, GNorm = 2.1298, lr_0 = 3.4805e-04
Loss = 3.7847e-01, PNorm = 60.1688, GNorm = 1.5179, lr_0 = 3.4781e-04
Loss = 4.0987e-01, PNorm = 60.1730, GNorm = 1.4951, lr_0 = 3.4757e-04
Loss = 3.8888e-01, PNorm = 60.1749, GNorm = 1.4601, lr_0 = 3.4733e-04
Loss = 4.4340e-01, PNorm = 60.1830, GNorm = 1.0779, lr_0 = 3.4709e-04
Loss = 4.5387e-01, PNorm = 60.1905, GNorm = 1.0690, lr_0 = 3.4686e-04
Loss = 4.1419e-01, PNorm = 60.1982, GNorm = 1.2406, lr_0 = 3.4662e-04
Loss = 3.9703e-01, PNorm = 60.2032, GNorm = 1.4482, lr_0 = 3.4638e-04
Loss = 3.5982e-01, PNorm = 60.2009, GNorm = 1.1228, lr_0 = 3.4614e-04
Loss = 4.1233e-01, PNorm = 60.2037, GNorm = 1.7814, lr_0 = 3.4591e-04
Loss = 4.0179e-01, PNorm = 60.2061, GNorm = 1.5776, lr_0 = 3.4567e-04
Loss = 4.3970e-01, PNorm = 60.2110, GNorm = 1.6924, lr_0 = 3.4543e-04
Loss = 3.8954e-01, PNorm = 60.2122, GNorm = 1.5207, lr_0 = 3.4520e-04
Loss = 3.9775e-01, PNorm = 60.2171, GNorm = 0.7499, lr_0 = 3.4496e-04
Loss = 4.0756e-01, PNorm = 60.2249, GNorm = 1.5065, lr_0 = 3.4472e-04
Loss = 4.4726e-01, PNorm = 60.2320, GNorm = 1.5558, lr_0 = 3.4449e-04
Loss = 3.3908e-01, PNorm = 60.2367, GNorm = 1.2443, lr_0 = 3.4425e-04
Loss = 4.4203e-01, PNorm = 60.2449, GNorm = 1.4443, lr_0 = 3.4402e-04
Loss = 4.0415e-01, PNorm = 60.2492, GNorm = 1.5479, lr_0 = 3.4378e-04
Loss = 3.9044e-01, PNorm = 60.2533, GNorm = 1.1376, lr_0 = 3.4354e-04
Loss = 3.9185e-01, PNorm = 60.2611, GNorm = 1.1919, lr_0 = 3.4331e-04
Validation mae = 0.112882
Epoch 15
Loss = 3.8743e-01, PNorm = 60.2642, GNorm = 1.1953, lr_0 = 3.4307e-04
Loss = 3.1808e-01, PNorm = 60.2708, GNorm = 1.4871, lr_0 = 3.4284e-04
Loss = 4.1544e-01, PNorm = 60.2751, GNorm = 1.4543, lr_0 = 3.4260e-04
Loss = 3.6007e-01, PNorm = 60.2804, GNorm = 1.1170, lr_0 = 3.4237e-04
Loss = 3.6580e-01, PNorm = 60.2860, GNorm = 1.7936, lr_0 = 3.4213e-04
Loss = 3.7062e-01, PNorm = 60.2897, GNorm = 1.7687, lr_0 = 3.4190e-04
Loss = 3.5026e-01, PNorm = 60.2898, GNorm = 1.8624, lr_0 = 3.4167e-04
Loss = 3.9888e-01, PNorm = 60.2892, GNorm = 1.6996, lr_0 = 3.4143e-04
Loss = 3.9275e-01, PNorm = 60.2954, GNorm = 1.4862, lr_0 = 3.4120e-04
Loss = 4.1674e-01, PNorm = 60.2986, GNorm = 1.2447, lr_0 = 3.4096e-04
Loss = 4.3621e-01, PNorm = 60.3055, GNorm = 1.4867, lr_0 = 3.4073e-04
Loss = 3.8664e-01, PNorm = 60.3136, GNorm = 1.3187, lr_0 = 3.4050e-04
Loss = 4.0155e-01, PNorm = 60.3203, GNorm = 1.4398, lr_0 = 3.4026e-04
Loss = 3.8949e-01, PNorm = 60.3236, GNorm = 1.2059, lr_0 = 3.4003e-04
Loss = 3.3768e-01, PNorm = 60.3287, GNorm = 1.2874, lr_0 = 3.3980e-04
Loss = 3.9572e-01, PNorm = 60.3316, GNorm = 1.6253, lr_0 = 3.3956e-04
Loss = 3.6861e-01, PNorm = 60.3349, GNorm = 1.4050, lr_0 = 3.3933e-04
Loss = 3.6384e-01, PNorm = 60.3427, GNorm = 1.4873, lr_0 = 3.3910e-04
Loss = 4.3911e-01, PNorm = 60.3471, GNorm = 1.0676, lr_0 = 3.3887e-04
Loss = 3.8661e-01, PNorm = 60.3530, GNorm = 1.5201, lr_0 = 3.3864e-04
Loss = 3.7940e-01, PNorm = 60.3538, GNorm = 0.8465, lr_0 = 3.3840e-04
Loss = 3.2629e-01, PNorm = 60.3577, GNorm = 1.4457, lr_0 = 3.3817e-04
Loss = 3.8970e-01, PNorm = 60.3609, GNorm = 1.1645, lr_0 = 3.3794e-04
Loss = 3.9550e-01, PNorm = 60.3693, GNorm = 0.8105, lr_0 = 3.3771e-04
Loss = 4.2407e-01, PNorm = 60.3706, GNorm = 1.3376, lr_0 = 3.3748e-04
Loss = 3.1841e-01, PNorm = 60.3746, GNorm = 0.7218, lr_0 = 3.3725e-04
Loss = 3.4506e-01, PNorm = 60.3812, GNorm = 1.1135, lr_0 = 3.3701e-04
Loss = 3.6788e-01, PNorm = 60.3856, GNorm = 1.1889, lr_0 = 3.3678e-04
Loss = 3.6951e-01, PNorm = 60.3916, GNorm = 2.7281, lr_0 = 3.3655e-04
Loss = 4.3504e-01, PNorm = 60.3918, GNorm = 1.1841, lr_0 = 3.3632e-04
Loss = 3.8263e-01, PNorm = 60.4007, GNorm = 1.6707, lr_0 = 3.3609e-04
Loss = 4.0022e-01, PNorm = 60.4094, GNorm = 1.1738, lr_0 = 3.3586e-04
Loss = 3.7130e-01, PNorm = 60.4152, GNorm = 1.5540, lr_0 = 3.3563e-04
Loss = 3.9965e-01, PNorm = 60.4207, GNorm = 1.2813, lr_0 = 3.3540e-04
Loss = 3.2068e-01, PNorm = 60.4293, GNorm = 1.4206, lr_0 = 3.3517e-04
Loss = 3.6200e-01, PNorm = 60.4345, GNorm = 1.0539, lr_0 = 3.3494e-04
Loss = 4.1340e-01, PNorm = 60.4419, GNorm = 1.6373, lr_0 = 3.3471e-04
Loss = 4.1629e-01, PNorm = 60.4511, GNorm = 1.5578, lr_0 = 3.3448e-04
Loss = 4.2541e-01, PNorm = 60.4572, GNorm = 1.6855, lr_0 = 3.3425e-04
Loss = 3.8118e-01, PNorm = 60.4600, GNorm = 1.2472, lr_0 = 3.3403e-04
Loss = 3.8390e-01, PNorm = 60.4629, GNorm = 1.4391, lr_0 = 3.3380e-04
Loss = 3.7442e-01, PNorm = 60.4702, GNorm = 1.5492, lr_0 = 3.3357e-04
Loss = 3.1830e-01, PNorm = 60.4713, GNorm = 1.5748, lr_0 = 3.3334e-04
Loss = 3.6309e-01, PNorm = 60.4770, GNorm = 1.5763, lr_0 = 3.3311e-04
Loss = 3.6586e-01, PNorm = 60.4820, GNorm = 1.3822, lr_0 = 3.3288e-04
Loss = 3.3403e-01, PNorm = 60.4888, GNorm = 1.3041, lr_0 = 3.3265e-04
Loss = 3.6406e-01, PNorm = 60.4864, GNorm = 1.3657, lr_0 = 3.3243e-04
Loss = 3.3621e-01, PNorm = 60.4941, GNorm = 1.0839, lr_0 = 3.3220e-04
Loss = 3.3200e-01, PNorm = 60.5000, GNorm = 1.2996, lr_0 = 3.3197e-04
Loss = 3.8202e-01, PNorm = 60.5087, GNorm = 1.2630, lr_0 = 3.3174e-04
Loss = 4.0149e-01, PNorm = 60.5130, GNorm = 1.3866, lr_0 = 3.3152e-04
Loss = 3.8477e-01, PNorm = 60.5167, GNorm = 1.9056, lr_0 = 3.3129e-04
Loss = 3.6321e-01, PNorm = 60.5229, GNorm = 1.6583, lr_0 = 3.3106e-04
Loss = 3.5072e-01, PNorm = 60.5243, GNorm = 1.3016, lr_0 = 3.3084e-04
Loss = 4.0996e-01, PNorm = 60.5283, GNorm = 1.3691, lr_0 = 3.3061e-04
Loss = 3.7634e-01, PNorm = 60.5298, GNorm = 1.8965, lr_0 = 3.3038e-04
Loss = 4.0457e-01, PNorm = 60.5329, GNorm = 1.0854, lr_0 = 3.3016e-04
Loss = 3.9298e-01, PNorm = 60.5373, GNorm = 1.3366, lr_0 = 3.2993e-04
Loss = 4.0268e-01, PNorm = 60.5406, GNorm = 1.6388, lr_0 = 3.2970e-04
Loss = 3.7444e-01, PNorm = 60.5484, GNorm = 1.1767, lr_0 = 3.2948e-04
Loss = 4.3137e-01, PNorm = 60.5535, GNorm = 1.4672, lr_0 = 3.2925e-04
Loss = 3.5890e-01, PNorm = 60.5569, GNorm = 1.2615, lr_0 = 3.2903e-04
Loss = 3.9861e-01, PNorm = 60.5560, GNorm = 1.1717, lr_0 = 3.2880e-04
Loss = 4.3523e-01, PNorm = 60.5609, GNorm = 1.0579, lr_0 = 3.2858e-04
Loss = 3.7839e-01, PNorm = 60.5655, GNorm = 2.0268, lr_0 = 3.2835e-04
Loss = 3.7410e-01, PNorm = 60.5705, GNorm = 1.0876, lr_0 = 3.2813e-04
Loss = 3.4385e-01, PNorm = 60.5746, GNorm = 1.2580, lr_0 = 3.2790e-04
Loss = 4.0327e-01, PNorm = 60.5748, GNorm = 1.3462, lr_0 = 3.2768e-04
Loss = 3.6269e-01, PNorm = 60.5771, GNorm = 1.3046, lr_0 = 3.2745e-04
Loss = 4.0062e-01, PNorm = 60.5776, GNorm = 1.2227, lr_0 = 3.2723e-04
Loss = 3.7705e-01, PNorm = 60.5829, GNorm = 1.2021, lr_0 = 3.2700e-04
Loss = 3.8236e-01, PNorm = 60.5876, GNorm = 1.7337, lr_0 = 3.2678e-04
Loss = 3.5124e-01, PNorm = 60.5918, GNorm = 1.3856, lr_0 = 3.2656e-04
Loss = 3.5780e-01, PNorm = 60.5960, GNorm = 1.0222, lr_0 = 3.2633e-04
Loss = 4.2441e-01, PNorm = 60.5977, GNorm = 1.2085, lr_0 = 3.2611e-04
Loss = 4.0379e-01, PNorm = 60.6051, GNorm = 1.1674, lr_0 = 3.2589e-04
Loss = 3.7331e-01, PNorm = 60.6110, GNorm = 1.8101, lr_0 = 3.2566e-04
Loss = 4.2349e-01, PNorm = 60.6168, GNorm = 1.1767, lr_0 = 3.2544e-04
Loss = 3.4425e-01, PNorm = 60.6258, GNorm = 1.9446, lr_0 = 3.2522e-04
Loss = 3.6920e-01, PNorm = 60.6339, GNorm = 1.6883, lr_0 = 3.2499e-04
Loss = 3.1463e-01, PNorm = 60.6419, GNorm = 1.3092, lr_0 = 3.2477e-04
Loss = 3.3550e-01, PNorm = 60.6428, GNorm = 1.2722, lr_0 = 3.2455e-04
Loss = 3.7375e-01, PNorm = 60.6458, GNorm = 1.1287, lr_0 = 3.2433e-04
Loss = 3.5511e-01, PNorm = 60.6516, GNorm = 1.3966, lr_0 = 3.2410e-04
Loss = 3.4092e-01, PNorm = 60.6552, GNorm = 1.8170, lr_0 = 3.2388e-04
Loss = 4.3125e-01, PNorm = 60.6575, GNorm = 2.5256, lr_0 = 3.2366e-04
Loss = 4.2125e-01, PNorm = 60.6629, GNorm = 1.7447, lr_0 = 3.2344e-04
Loss = 4.1681e-01, PNorm = 60.6634, GNorm = 1.7141, lr_0 = 3.2322e-04
Loss = 3.7261e-01, PNorm = 60.6699, GNorm = 1.9705, lr_0 = 3.2300e-04
Loss = 3.4989e-01, PNorm = 60.6741, GNorm = 1.6102, lr_0 = 3.2277e-04
Loss = 4.1881e-01, PNorm = 60.6841, GNorm = 1.5990, lr_0 = 3.2255e-04
Loss = 4.0316e-01, PNorm = 60.6914, GNorm = 1.4894, lr_0 = 3.2233e-04
Loss = 3.8458e-01, PNorm = 60.6987, GNorm = 2.0623, lr_0 = 3.2211e-04
Loss = 4.0826e-01, PNorm = 60.7024, GNorm = 1.5418, lr_0 = 3.2189e-04
Loss = 3.6969e-01, PNorm = 60.7078, GNorm = 1.2483, lr_0 = 3.2167e-04
Loss = 3.9357e-01, PNorm = 60.7140, GNorm = 1.5305, lr_0 = 3.2145e-04
Loss = 3.4539e-01, PNorm = 60.7164, GNorm = 0.9067, lr_0 = 3.2123e-04
Loss = 4.0243e-01, PNorm = 60.7232, GNorm = 1.2651, lr_0 = 3.2101e-04
Loss = 3.5841e-01, PNorm = 60.7262, GNorm = 2.2384, lr_0 = 3.2079e-04
Loss = 3.9581e-01, PNorm = 60.7303, GNorm = 2.0073, lr_0 = 3.2057e-04
Loss = 3.9663e-01, PNorm = 60.7341, GNorm = 1.1687, lr_0 = 3.2035e-04
Loss = 4.0176e-01, PNorm = 60.7367, GNorm = 1.5224, lr_0 = 3.2013e-04
Loss = 3.9961e-01, PNorm = 60.7428, GNorm = 1.8290, lr_0 = 3.1991e-04
Loss = 3.7300e-01, PNorm = 60.7473, GNorm = 1.2363, lr_0 = 3.1969e-04
Loss = 3.9837e-01, PNorm = 60.7533, GNorm = 1.5954, lr_0 = 3.1947e-04
Loss = 4.7807e-01, PNorm = 60.7575, GNorm = 1.7646, lr_0 = 3.1925e-04
Loss = 3.5203e-01, PNorm = 60.7684, GNorm = 1.9884, lr_0 = 3.1904e-04
Loss = 4.1721e-01, PNorm = 60.7740, GNorm = 2.5985, lr_0 = 3.1882e-04
Loss = 4.0425e-01, PNorm = 60.7794, GNorm = 1.9378, lr_0 = 3.1860e-04
Loss = 3.6962e-01, PNorm = 60.7850, GNorm = 1.1487, lr_0 = 3.1838e-04
Loss = 4.2534e-01, PNorm = 60.7886, GNorm = 1.2038, lr_0 = 3.1816e-04
Loss = 4.2328e-01, PNorm = 60.7894, GNorm = 1.5350, lr_0 = 3.1794e-04
Loss = 3.3976e-01, PNorm = 60.7935, GNorm = 1.1648, lr_0 = 3.1773e-04
Loss = 3.8704e-01, PNorm = 60.7952, GNorm = 1.3953, lr_0 = 3.1751e-04
Loss = 3.3415e-01, PNorm = 60.7995, GNorm = 1.5246, lr_0 = 3.1729e-04
Loss = 4.0697e-01, PNorm = 60.8060, GNorm = 1.5595, lr_0 = 3.1707e-04
Loss = 3.8401e-01, PNorm = 60.8105, GNorm = 1.3534, lr_0 = 3.1686e-04
Loss = 4.6341e-01, PNorm = 60.8163, GNorm = 2.0537, lr_0 = 3.1664e-04
Loss = 4.1384e-01, PNorm = 60.8184, GNorm = 1.2690, lr_0 = 3.1642e-04
Loss = 4.2623e-01, PNorm = 60.8234, GNorm = 1.8676, lr_0 = 3.1621e-04
Validation mae = 0.113330
Epoch 16
Loss = 3.7706e-01, PNorm = 60.8283, GNorm = 1.5587, lr_0 = 3.1599e-04
Loss = 3.8820e-01, PNorm = 60.8275, GNorm = 1.6156, lr_0 = 3.1577e-04
Loss = 3.8474e-01, PNorm = 60.8332, GNorm = 1.4126, lr_0 = 3.1556e-04
Loss = 3.6124e-01, PNorm = 60.8387, GNorm = 1.0556, lr_0 = 3.1534e-04
Loss = 4.0521e-01, PNorm = 60.8355, GNorm = 1.0366, lr_0 = 3.1512e-04
Loss = 3.8626e-01, PNorm = 60.8369, GNorm = 1.6557, lr_0 = 3.1491e-04
Loss = 3.4883e-01, PNorm = 60.8429, GNorm = 1.5027, lr_0 = 3.1469e-04
Loss = 3.7469e-01, PNorm = 60.8477, GNorm = 1.9715, lr_0 = 3.1448e-04
Loss = 3.7786e-01, PNorm = 60.8523, GNorm = 1.8255, lr_0 = 3.1426e-04
Loss = 3.4195e-01, PNorm = 60.8577, GNorm = 1.1327, lr_0 = 3.1405e-04
Loss = 3.8895e-01, PNorm = 60.8637, GNorm = 1.0536, lr_0 = 3.1383e-04
Loss = 4.4088e-01, PNorm = 60.8693, GNorm = 1.5931, lr_0 = 3.1362e-04
Loss = 3.2675e-01, PNorm = 60.8733, GNorm = 1.1647, lr_0 = 3.1340e-04
Loss = 2.8178e-01, PNorm = 60.8768, GNorm = 1.1274, lr_0 = 3.1319e-04
Loss = 3.8927e-01, PNorm = 60.8795, GNorm = 1.2380, lr_0 = 3.1297e-04
Loss = 3.5031e-01, PNorm = 60.8813, GNorm = 1.2453, lr_0 = 3.1276e-04
Loss = 4.3910e-01, PNorm = 60.8869, GNorm = 1.5861, lr_0 = 3.1254e-04
Loss = 3.9188e-01, PNorm = 60.8896, GNorm = 1.4347, lr_0 = 3.1233e-04
Loss = 3.3583e-01, PNorm = 60.8913, GNorm = 1.7756, lr_0 = 3.1212e-04
Loss = 3.4339e-01, PNorm = 60.8923, GNorm = 1.2225, lr_0 = 3.1190e-04
Loss = 3.8160e-01, PNorm = 60.8998, GNorm = 1.3317, lr_0 = 3.1169e-04
Loss = 4.2741e-01, PNorm = 60.9051, GNorm = 2.5942, lr_0 = 3.1147e-04
Loss = 3.5830e-01, PNorm = 60.9153, GNorm = 1.5164, lr_0 = 3.1126e-04
Loss = 3.6981e-01, PNorm = 60.9157, GNorm = 1.6035, lr_0 = 3.1105e-04
Loss = 3.5037e-01, PNorm = 60.9190, GNorm = 1.2535, lr_0 = 3.1083e-04
Loss = 3.5990e-01, PNorm = 60.9221, GNorm = 1.5704, lr_0 = 3.1062e-04
Loss = 3.7236e-01, PNorm = 60.9225, GNorm = 1.3950, lr_0 = 3.1041e-04
Loss = 4.1845e-01, PNorm = 60.9253, GNorm = 1.0762, lr_0 = 3.1020e-04
Loss = 3.1958e-01, PNorm = 60.9303, GNorm = 1.4562, lr_0 = 3.0998e-04
Loss = 3.5576e-01, PNorm = 60.9337, GNorm = 1.5133, lr_0 = 3.0977e-04
Loss = 4.0335e-01, PNorm = 60.9380, GNorm = 1.5595, lr_0 = 3.0956e-04
Loss = 3.6739e-01, PNorm = 60.9396, GNorm = 1.5923, lr_0 = 3.0935e-04
Loss = 4.0577e-01, PNorm = 60.9433, GNorm = 1.7074, lr_0 = 3.0914e-04
Loss = 3.5200e-01, PNorm = 60.9449, GNorm = 1.7364, lr_0 = 3.0892e-04
Loss = 3.8181e-01, PNorm = 60.9529, GNorm = 0.9772, lr_0 = 3.0871e-04
Loss = 3.1949e-01, PNorm = 60.9614, GNorm = 1.6870, lr_0 = 3.0850e-04
Loss = 3.5710e-01, PNorm = 60.9624, GNorm = 1.0292, lr_0 = 3.0829e-04
Loss = 3.5838e-01, PNorm = 60.9671, GNorm = 1.4623, lr_0 = 3.0808e-04
Loss = 4.0440e-01, PNorm = 60.9709, GNorm = 1.4489, lr_0 = 3.0787e-04
Loss = 3.9086e-01, PNorm = 60.9758, GNorm = 1.8478, lr_0 = 3.0766e-04
Loss = 3.5610e-01, PNorm = 60.9786, GNorm = 1.2357, lr_0 = 3.0745e-04
Loss = 3.6942e-01, PNorm = 60.9831, GNorm = 1.7025, lr_0 = 3.0723e-04
Loss = 3.6271e-01, PNorm = 60.9881, GNorm = 1.7064, lr_0 = 3.0702e-04
Loss = 4.0527e-01, PNorm = 60.9910, GNorm = 1.1722, lr_0 = 3.0681e-04
Loss = 3.8092e-01, PNorm = 60.9949, GNorm = 1.5931, lr_0 = 3.0660e-04
Loss = 4.0560e-01, PNorm = 60.9987, GNorm = 1.1302, lr_0 = 3.0639e-04
Loss = 3.8705e-01, PNorm = 61.0048, GNorm = 1.6113, lr_0 = 3.0618e-04
Loss = 4.1304e-01, PNorm = 61.0108, GNorm = 1.2104, lr_0 = 3.0597e-04
Loss = 3.9413e-01, PNorm = 61.0175, GNorm = 1.3869, lr_0 = 3.0576e-04
Loss = 3.7027e-01, PNorm = 61.0271, GNorm = 1.5279, lr_0 = 3.0555e-04
Loss = 3.7595e-01, PNorm = 61.0306, GNorm = 0.8947, lr_0 = 3.0535e-04
Loss = 3.7861e-01, PNorm = 61.0370, GNorm = 1.2354, lr_0 = 3.0514e-04
Loss = 3.6599e-01, PNorm = 61.0421, GNorm = 1.6893, lr_0 = 3.0493e-04
Loss = 3.7615e-01, PNorm = 61.0459, GNorm = 1.4086, lr_0 = 3.0472e-04
Loss = 3.7139e-01, PNorm = 61.0486, GNorm = 1.7098, lr_0 = 3.0451e-04
Loss = 3.7376e-01, PNorm = 61.0526, GNorm = 1.4288, lr_0 = 3.0430e-04
Loss = 3.5893e-01, PNorm = 61.0542, GNorm = 1.2544, lr_0 = 3.0409e-04
Loss = 3.9140e-01, PNorm = 61.0603, GNorm = 1.5179, lr_0 = 3.0388e-04
Loss = 3.7072e-01, PNorm = 61.0666, GNorm = 1.3677, lr_0 = 3.0368e-04
Loss = 3.7691e-01, PNorm = 61.0716, GNorm = 2.4020, lr_0 = 3.0347e-04
Loss = 3.6093e-01, PNorm = 61.0774, GNorm = 1.2115, lr_0 = 3.0326e-04
Loss = 3.9080e-01, PNorm = 61.0814, GNorm = 1.3335, lr_0 = 3.0305e-04
Loss = 3.8465e-01, PNorm = 61.0864, GNorm = 1.7647, lr_0 = 3.0284e-04
Loss = 3.7914e-01, PNorm = 61.0887, GNorm = 1.0503, lr_0 = 3.0264e-04
Loss = 3.7445e-01, PNorm = 61.0953, GNorm = 1.1409, lr_0 = 3.0243e-04
Loss = 3.6276e-01, PNorm = 61.1012, GNorm = 1.4724, lr_0 = 3.0222e-04
Loss = 3.8262e-01, PNorm = 61.1024, GNorm = 1.4246, lr_0 = 3.0202e-04
Loss = 3.8195e-01, PNorm = 61.1064, GNorm = 1.2889, lr_0 = 3.0181e-04
Loss = 3.5235e-01, PNorm = 61.1091, GNorm = 1.2326, lr_0 = 3.0160e-04
Loss = 3.8383e-01, PNorm = 61.1102, GNorm = 2.2008, lr_0 = 3.0140e-04
Loss = 4.4942e-01, PNorm = 61.1154, GNorm = 1.4602, lr_0 = 3.0119e-04
Loss = 4.3507e-01, PNorm = 61.1193, GNorm = 1.4299, lr_0 = 3.0098e-04
Loss = 3.5505e-01, PNorm = 61.1212, GNorm = 1.3173, lr_0 = 3.0078e-04
Loss = 3.2916e-01, PNorm = 61.1237, GNorm = 1.0170, lr_0 = 3.0057e-04
Loss = 3.7412e-01, PNorm = 61.1246, GNorm = 1.4158, lr_0 = 3.0036e-04
Loss = 3.7964e-01, PNorm = 61.1261, GNorm = 1.5926, lr_0 = 3.0016e-04
Loss = 4.2633e-01, PNorm = 61.1285, GNorm = 1.5477, lr_0 = 2.9995e-04
Loss = 3.6486e-01, PNorm = 61.1290, GNorm = 1.4159, lr_0 = 2.9975e-04
Loss = 4.1516e-01, PNorm = 61.1316, GNorm = 1.1076, lr_0 = 2.9954e-04
Loss = 3.4848e-01, PNorm = 61.1353, GNorm = 1.0964, lr_0 = 2.9934e-04
Loss = 4.2414e-01, PNorm = 61.1378, GNorm = 1.3410, lr_0 = 2.9913e-04
Loss = 3.8214e-01, PNorm = 61.1410, GNorm = 1.2745, lr_0 = 2.9893e-04
Loss = 3.7588e-01, PNorm = 61.1469, GNorm = 1.2454, lr_0 = 2.9872e-04
Loss = 3.3354e-01, PNorm = 61.1494, GNorm = 1.6947, lr_0 = 2.9852e-04
Loss = 3.9343e-01, PNorm = 61.1536, GNorm = 1.9421, lr_0 = 2.9831e-04
Loss = 3.6713e-01, PNorm = 61.1581, GNorm = 1.5262, lr_0 = 2.9811e-04
Loss = 4.0494e-01, PNorm = 61.1614, GNorm = 1.1276, lr_0 = 2.9790e-04
Loss = 3.5722e-01, PNorm = 61.1678, GNorm = 2.3433, lr_0 = 2.9770e-04
Loss = 3.4275e-01, PNorm = 61.1719, GNorm = 1.2467, lr_0 = 2.9750e-04
Loss = 3.3273e-01, PNorm = 61.1771, GNorm = 1.3439, lr_0 = 2.9729e-04
Loss = 3.8317e-01, PNorm = 61.1814, GNorm = 1.3172, lr_0 = 2.9709e-04
Loss = 3.8645e-01, PNorm = 61.1864, GNorm = 1.4389, lr_0 = 2.9689e-04
Loss = 3.6443e-01, PNorm = 61.1963, GNorm = 1.4856, lr_0 = 2.9668e-04
Loss = 3.9102e-01, PNorm = 61.1965, GNorm = 1.2587, lr_0 = 2.9648e-04
Loss = 4.1838e-01, PNorm = 61.1993, GNorm = 1.3889, lr_0 = 2.9628e-04
Loss = 4.2305e-01, PNorm = 61.2012, GNorm = 1.2339, lr_0 = 2.9607e-04
Loss = 3.8305e-01, PNorm = 61.2074, GNorm = 1.2152, lr_0 = 2.9587e-04
Loss = 4.0472e-01, PNorm = 61.2119, GNorm = 1.0471, lr_0 = 2.9567e-04
Loss = 4.1561e-01, PNorm = 61.2149, GNorm = 2.5074, lr_0 = 2.9546e-04
Loss = 4.0159e-01, PNorm = 61.2191, GNorm = 1.6498, lr_0 = 2.9526e-04
Loss = 3.9735e-01, PNorm = 61.2276, GNorm = 1.4207, lr_0 = 2.9506e-04
Loss = 3.9369e-01, PNorm = 61.2300, GNorm = 1.2971, lr_0 = 2.9486e-04
Loss = 3.3252e-01, PNorm = 61.2326, GNorm = 1.6769, lr_0 = 2.9466e-04
Loss = 4.0430e-01, PNorm = 61.2334, GNorm = 1.2818, lr_0 = 2.9445e-04
Loss = 4.0439e-01, PNorm = 61.2386, GNorm = 1.3598, lr_0 = 2.9425e-04
Loss = 3.7731e-01, PNorm = 61.2415, GNorm = 1.4576, lr_0 = 2.9405e-04
Loss = 4.2809e-01, PNorm = 61.2452, GNorm = 1.1370, lr_0 = 2.9385e-04
Loss = 4.1031e-01, PNorm = 61.2518, GNorm = 1.1679, lr_0 = 2.9365e-04
Loss = 3.4769e-01, PNorm = 61.2509, GNorm = 1.5190, lr_0 = 2.9345e-04
Loss = 4.0335e-01, PNorm = 61.2564, GNorm = 2.2544, lr_0 = 2.9325e-04
Loss = 3.4126e-01, PNorm = 61.2594, GNorm = 1.4464, lr_0 = 2.9305e-04
Loss = 3.5961e-01, PNorm = 61.2650, GNorm = 1.2379, lr_0 = 2.9284e-04
Loss = 3.8949e-01, PNorm = 61.2640, GNorm = 2.1260, lr_0 = 2.9264e-04
Loss = 4.0017e-01, PNorm = 61.2651, GNorm = 1.1155, lr_0 = 2.9244e-04
Loss = 3.3635e-01, PNorm = 61.2689, GNorm = 1.7392, lr_0 = 2.9224e-04
Loss = 3.8758e-01, PNorm = 61.2742, GNorm = 1.5169, lr_0 = 2.9204e-04
Loss = 3.8465e-01, PNorm = 61.2782, GNorm = 1.8718, lr_0 = 2.9184e-04
Loss = 4.3076e-01, PNorm = 61.2817, GNorm = 1.3810, lr_0 = 2.9164e-04
Loss = 3.5806e-01, PNorm = 61.2875, GNorm = 1.3740, lr_0 = 2.9144e-04
Loss = 4.1023e-01, PNorm = 61.2883, GNorm = 1.6925, lr_0 = 2.9124e-04
Validation mae = 0.113486
Epoch 17
Loss = 4.2909e-01, PNorm = 61.2931, GNorm = 1.6551, lr_0 = 2.9104e-04
Loss = 3.3742e-01, PNorm = 61.2954, GNorm = 1.8830, lr_0 = 2.9084e-04
Loss = 3.4965e-01, PNorm = 61.2988, GNorm = 1.2326, lr_0 = 2.9065e-04
Loss = 3.5735e-01, PNorm = 61.3010, GNorm = 1.4708, lr_0 = 2.9045e-04
Loss = 3.6834e-01, PNorm = 61.3038, GNorm = 1.6303, lr_0 = 2.9025e-04
Loss = 3.4106e-01, PNorm = 61.3091, GNorm = 1.6167, lr_0 = 2.9005e-04
Loss = 3.5875e-01, PNorm = 61.3130, GNorm = 1.0570, lr_0 = 2.8985e-04
Loss = 3.5393e-01, PNorm = 61.3179, GNorm = 1.6585, lr_0 = 2.8965e-04
Loss = 4.2038e-01, PNorm = 61.3226, GNorm = 1.8646, lr_0 = 2.8945e-04
Loss = 3.3927e-01, PNorm = 61.3279, GNorm = 1.1975, lr_0 = 2.8925e-04
Loss = 3.6189e-01, PNorm = 61.3307, GNorm = 1.1845, lr_0 = 2.8906e-04
Loss = 3.6117e-01, PNorm = 61.3342, GNorm = 1.4061, lr_0 = 2.8886e-04
Loss = 3.9280e-01, PNorm = 61.3375, GNorm = 1.1632, lr_0 = 2.8866e-04
Loss = 3.5249e-01, PNorm = 61.3454, GNorm = 0.9276, lr_0 = 2.8846e-04
Loss = 3.5324e-01, PNorm = 61.3478, GNorm = 1.2981, lr_0 = 2.8826e-04
Loss = 3.9308e-01, PNorm = 61.3524, GNorm = 1.6777, lr_0 = 2.8807e-04
Loss = 3.7888e-01, PNorm = 61.3532, GNorm = 1.3649, lr_0 = 2.8787e-04
Loss = 3.5308e-01, PNorm = 61.3535, GNorm = 1.2798, lr_0 = 2.8767e-04
Loss = 3.6765e-01, PNorm = 61.3552, GNorm = 1.3208, lr_0 = 2.8748e-04
Loss = 3.3943e-01, PNorm = 61.3566, GNorm = 1.4403, lr_0 = 2.8728e-04
Loss = 3.5210e-01, PNorm = 61.3594, GNorm = 1.3017, lr_0 = 2.8708e-04
Loss = 3.4804e-01, PNorm = 61.3603, GNorm = 2.5753, lr_0 = 2.8689e-04
Loss = 3.4449e-01, PNorm = 61.3630, GNorm = 1.3110, lr_0 = 2.8669e-04
Loss = 3.4819e-01, PNorm = 61.3656, GNorm = 1.4924, lr_0 = 2.8649e-04
Loss = 3.4658e-01, PNorm = 61.3673, GNorm = 1.2046, lr_0 = 2.8630e-04
Loss = 4.0445e-01, PNorm = 61.3716, GNorm = 1.9022, lr_0 = 2.8610e-04
Loss = 3.3272e-01, PNorm = 61.3755, GNorm = 1.4669, lr_0 = 2.8590e-04
Loss = 3.9983e-01, PNorm = 61.3838, GNorm = 1.5014, lr_0 = 2.8571e-04
Loss = 3.4546e-01, PNorm = 61.3912, GNorm = 0.9364, lr_0 = 2.8551e-04
Loss = 3.6030e-01, PNorm = 61.3952, GNorm = 1.5470, lr_0 = 2.8532e-04
Loss = 3.7099e-01, PNorm = 61.3999, GNorm = 1.1461, lr_0 = 2.8512e-04
Loss = 4.3258e-01, PNorm = 61.4040, GNorm = 1.2235, lr_0 = 2.8493e-04
Loss = 3.7374e-01, PNorm = 61.4092, GNorm = 1.1708, lr_0 = 2.8473e-04
Loss = 3.7192e-01, PNorm = 61.4097, GNorm = 1.2341, lr_0 = 2.8454e-04
Loss = 3.6688e-01, PNorm = 61.4116, GNorm = 1.1038, lr_0 = 2.8434e-04
Loss = 3.8288e-01, PNorm = 61.4149, GNorm = 1.5925, lr_0 = 2.8415e-04
Loss = 3.6241e-01, PNorm = 61.4191, GNorm = 1.1110, lr_0 = 2.8395e-04
Loss = 3.5822e-01, PNorm = 61.4210, GNorm = 1.2355, lr_0 = 2.8376e-04
Loss = 3.7466e-01, PNorm = 61.4232, GNorm = 1.9766, lr_0 = 2.8356e-04
Loss = 3.7127e-01, PNorm = 61.4244, GNorm = 1.5870, lr_0 = 2.8337e-04
Loss = 4.1641e-01, PNorm = 61.4289, GNorm = 1.6203, lr_0 = 2.8317e-04
Loss = 3.6343e-01, PNorm = 61.4323, GNorm = 1.1067, lr_0 = 2.8298e-04
Loss = 3.3457e-01, PNorm = 61.4389, GNorm = 1.4667, lr_0 = 2.8279e-04
Loss = 3.8188e-01, PNorm = 61.4449, GNorm = 1.1997, lr_0 = 2.8259e-04
Loss = 3.6606e-01, PNorm = 61.4469, GNorm = 1.5163, lr_0 = 2.8240e-04
Loss = 3.6193e-01, PNorm = 61.4529, GNorm = 1.4362, lr_0 = 2.8221e-04
Loss = 3.2768e-01, PNorm = 61.4571, GNorm = 1.6640, lr_0 = 2.8201e-04
Loss = 3.5825e-01, PNorm = 61.4596, GNorm = 1.6933, lr_0 = 2.8182e-04
Loss = 3.7269e-01, PNorm = 61.4639, GNorm = 1.2135, lr_0 = 2.8163e-04
Loss = 4.0422e-01, PNorm = 61.4657, GNorm = 0.9844, lr_0 = 2.8143e-04
Loss = 3.6871e-01, PNorm = 61.4716, GNorm = 1.3524, lr_0 = 2.8124e-04
Loss = 3.6014e-01, PNorm = 61.4744, GNorm = 1.5692, lr_0 = 2.8105e-04
Loss = 3.6822e-01, PNorm = 61.4778, GNorm = 1.2349, lr_0 = 2.8085e-04
Loss = 3.7941e-01, PNorm = 61.4834, GNorm = 1.3125, lr_0 = 2.8066e-04
Loss = 3.8077e-01, PNorm = 61.4845, GNorm = 1.1679, lr_0 = 2.8047e-04
Loss = 3.7615e-01, PNorm = 61.4843, GNorm = 1.3734, lr_0 = 2.8028e-04
Loss = 4.0040e-01, PNorm = 61.4863, GNorm = 1.6759, lr_0 = 2.8009e-04
Loss = 3.6338e-01, PNorm = 61.4887, GNorm = 1.7274, lr_0 = 2.7989e-04
Loss = 3.3822e-01, PNorm = 61.4916, GNorm = 1.3412, lr_0 = 2.7970e-04
Loss = 3.6235e-01, PNorm = 61.4971, GNorm = 1.6939, lr_0 = 2.7951e-04
Loss = 3.7573e-01, PNorm = 61.5007, GNorm = 1.1424, lr_0 = 2.7932e-04
Loss = 3.4231e-01, PNorm = 61.5059, GNorm = 1.4020, lr_0 = 2.7913e-04
Loss = 4.1299e-01, PNorm = 61.5132, GNorm = 1.4680, lr_0 = 2.7894e-04
Loss = 3.7669e-01, PNorm = 61.5173, GNorm = 1.7225, lr_0 = 2.7875e-04
Loss = 3.8403e-01, PNorm = 61.5196, GNorm = 1.1457, lr_0 = 2.7855e-04
Loss = 3.8305e-01, PNorm = 61.5196, GNorm = 1.5826, lr_0 = 2.7836e-04
Loss = 3.4794e-01, PNorm = 61.5226, GNorm = 1.3002, lr_0 = 2.7817e-04
Loss = 3.9064e-01, PNorm = 61.5234, GNorm = 1.4791, lr_0 = 2.7798e-04
Loss = 3.7380e-01, PNorm = 61.5234, GNorm = 1.3205, lr_0 = 2.7779e-04
Loss = 3.2931e-01, PNorm = 61.5301, GNorm = 1.4794, lr_0 = 2.7760e-04
Loss = 3.2360e-01, PNorm = 61.5311, GNorm = 1.4211, lr_0 = 2.7741e-04
Loss = 4.2722e-01, PNorm = 61.5328, GNorm = 1.2301, lr_0 = 2.7722e-04
Loss = 3.5768e-01, PNorm = 61.5400, GNorm = 1.8004, lr_0 = 2.7703e-04
Loss = 3.6862e-01, PNorm = 61.5391, GNorm = 2.2758, lr_0 = 2.7684e-04
Loss = 3.2410e-01, PNorm = 61.5427, GNorm = 1.1297, lr_0 = 2.7665e-04
Loss = 3.8135e-01, PNorm = 61.5459, GNorm = 1.0369, lr_0 = 2.7646e-04
Loss = 3.7045e-01, PNorm = 61.5467, GNorm = 1.6207, lr_0 = 2.7627e-04
Loss = 3.9832e-01, PNorm = 61.5476, GNorm = 1.7346, lr_0 = 2.7608e-04
Loss = 3.6590e-01, PNorm = 61.5483, GNorm = 1.0700, lr_0 = 2.7590e-04
Loss = 3.8827e-01, PNorm = 61.5566, GNorm = 1.6566, lr_0 = 2.7571e-04
Loss = 3.6710e-01, PNorm = 61.5600, GNorm = 1.5053, lr_0 = 2.7552e-04
Loss = 4.5575e-01, PNorm = 61.5635, GNorm = 1.6745, lr_0 = 2.7533e-04
Loss = 3.8082e-01, PNorm = 61.5715, GNorm = 1.3564, lr_0 = 2.7514e-04
Loss = 3.4394e-01, PNorm = 61.5729, GNorm = 1.3333, lr_0 = 2.7495e-04
Loss = 3.7565e-01, PNorm = 61.5758, GNorm = 1.3297, lr_0 = 2.7476e-04
Loss = 3.6309e-01, PNorm = 61.5770, GNorm = 1.3247, lr_0 = 2.7457e-04
Loss = 3.8718e-01, PNorm = 61.5791, GNorm = 1.2873, lr_0 = 2.7439e-04
Loss = 3.7276e-01, PNorm = 61.5832, GNorm = 1.2027, lr_0 = 2.7420e-04
Loss = 3.5878e-01, PNorm = 61.5848, GNorm = 1.2409, lr_0 = 2.7401e-04
Loss = 3.1997e-01, PNorm = 61.5864, GNorm = 1.2656, lr_0 = 2.7382e-04
Loss = 3.6146e-01, PNorm = 61.5896, GNorm = 1.5966, lr_0 = 2.7364e-04
Loss = 4.2114e-01, PNorm = 61.5919, GNorm = 1.5414, lr_0 = 2.7345e-04
Loss = 4.5011e-01, PNorm = 61.5973, GNorm = 2.8851, lr_0 = 2.7326e-04
Loss = 4.0225e-01, PNorm = 61.5985, GNorm = 1.8720, lr_0 = 2.7307e-04
Loss = 4.0203e-01, PNorm = 61.6023, GNorm = 1.2508, lr_0 = 2.7289e-04
Loss = 4.2357e-01, PNorm = 61.6064, GNorm = 1.4526, lr_0 = 2.7270e-04
Loss = 4.3493e-01, PNorm = 61.6137, GNorm = 1.4897, lr_0 = 2.7251e-04
Loss = 3.6236e-01, PNorm = 61.6164, GNorm = 1.3870, lr_0 = 2.7233e-04
Loss = 4.1086e-01, PNorm = 61.6203, GNorm = 1.7166, lr_0 = 2.7214e-04
Loss = 3.3224e-01, PNorm = 61.6192, GNorm = 1.5615, lr_0 = 2.7195e-04
Loss = 4.0060e-01, PNorm = 61.6182, GNorm = 1.8967, lr_0 = 2.7177e-04
Loss = 3.8290e-01, PNorm = 61.6221, GNorm = 1.4804, lr_0 = 2.7158e-04
Loss = 3.7864e-01, PNorm = 61.6265, GNorm = 0.9307, lr_0 = 2.7139e-04
Loss = 4.2153e-01, PNorm = 61.6298, GNorm = 1.1754, lr_0 = 2.7121e-04
Loss = 4.0551e-01, PNorm = 61.6328, GNorm = 1.4025, lr_0 = 2.7102e-04
Loss = 3.4754e-01, PNorm = 61.6353, GNorm = 1.4418, lr_0 = 2.7084e-04
Loss = 3.5741e-01, PNorm = 61.6392, GNorm = 1.7933, lr_0 = 2.7065e-04
Loss = 4.0045e-01, PNorm = 61.6441, GNorm = 1.5231, lr_0 = 2.7047e-04
Loss = 3.7827e-01, PNorm = 61.6504, GNorm = 2.4385, lr_0 = 2.7028e-04
Loss = 4.1078e-01, PNorm = 61.6490, GNorm = 1.3297, lr_0 = 2.7010e-04
Loss = 4.0826e-01, PNorm = 61.6540, GNorm = 1.3209, lr_0 = 2.6991e-04
Loss = 3.9823e-01, PNorm = 61.6554, GNorm = 1.9454, lr_0 = 2.6973e-04
Loss = 3.6294e-01, PNorm = 61.6617, GNorm = 1.9732, lr_0 = 2.6954e-04
Loss = 3.9722e-01, PNorm = 61.6640, GNorm = 1.5313, lr_0 = 2.6936e-04
Loss = 4.1587e-01, PNorm = 61.6688, GNorm = 1.2074, lr_0 = 2.6917e-04
Loss = 3.6085e-01, PNorm = 61.6696, GNorm = 1.5651, lr_0 = 2.6899e-04
Loss = 4.0261e-01, PNorm = 61.6713, GNorm = 1.0504, lr_0 = 2.6880e-04
Loss = 3.6644e-01, PNorm = 61.6752, GNorm = 1.1554, lr_0 = 2.6862e-04
Loss = 4.1604e-01, PNorm = 61.6796, GNorm = 0.9639, lr_0 = 2.6844e-04
Loss = 3.7006e-01, PNorm = 61.6839, GNorm = 1.3916, lr_0 = 2.6825e-04
Validation mae = 0.112251
Epoch 18
Loss = 4.0844e-01, PNorm = 61.6858, GNorm = 2.2387, lr_0 = 2.6807e-04
Loss = 3.1586e-01, PNorm = 61.6913, GNorm = 1.1404, lr_0 = 2.6788e-04
Loss = 3.5382e-01, PNorm = 61.6962, GNorm = 1.4879, lr_0 = 2.6770e-04
Loss = 3.7802e-01, PNorm = 61.7006, GNorm = 1.8550, lr_0 = 2.6752e-04
Loss = 3.2563e-01, PNorm = 61.7073, GNorm = 1.4890, lr_0 = 2.6733e-04
Loss = 3.2673e-01, PNorm = 61.7094, GNorm = 1.4182, lr_0 = 2.6715e-04
Loss = 3.3656e-01, PNorm = 61.7098, GNorm = 1.7124, lr_0 = 2.6697e-04
Loss = 3.5454e-01, PNorm = 61.7131, GNorm = 1.0592, lr_0 = 2.6678e-04
Loss = 3.9373e-01, PNorm = 61.7163, GNorm = 1.1672, lr_0 = 2.6660e-04
Loss = 3.6077e-01, PNorm = 61.7204, GNorm = 1.3226, lr_0 = 2.6642e-04
Loss = 3.7489e-01, PNorm = 61.7235, GNorm = 1.3173, lr_0 = 2.6624e-04
Loss = 3.7739e-01, PNorm = 61.7283, GNorm = 1.0555, lr_0 = 2.6605e-04
Loss = 3.9113e-01, PNorm = 61.7321, GNorm = 1.3641, lr_0 = 2.6587e-04
Loss = 3.4026e-01, PNorm = 61.7348, GNorm = 1.4413, lr_0 = 2.6569e-04
Loss = 3.9599e-01, PNorm = 61.7385, GNorm = 1.6321, lr_0 = 2.6551e-04
Loss = 3.7329e-01, PNorm = 61.7400, GNorm = 1.7690, lr_0 = 2.6533e-04
Loss = 3.6687e-01, PNorm = 61.7457, GNorm = 1.3312, lr_0 = 2.6514e-04
Loss = 3.5351e-01, PNorm = 61.7490, GNorm = 2.2381, lr_0 = 2.6496e-04
Loss = 3.6454e-01, PNorm = 61.7465, GNorm = 1.2823, lr_0 = 2.6478e-04
Loss = 3.9196e-01, PNorm = 61.7499, GNorm = 1.1980, lr_0 = 2.6460e-04
Loss = 4.3176e-01, PNorm = 61.7559, GNorm = 1.5631, lr_0 = 2.6442e-04
Loss = 3.3982e-01, PNorm = 61.7589, GNorm = 1.0191, lr_0 = 2.6424e-04
Loss = 3.9148e-01, PNorm = 61.7619, GNorm = 1.8262, lr_0 = 2.6406e-04
Loss = 3.6473e-01, PNorm = 61.7662, GNorm = 1.3817, lr_0 = 2.6388e-04
Loss = 3.6616e-01, PNorm = 61.7705, GNorm = 1.2417, lr_0 = 2.6369e-04
Loss = 3.7907e-01, PNorm = 61.7707, GNorm = 1.2742, lr_0 = 2.6351e-04
Loss = 4.0470e-01, PNorm = 61.7722, GNorm = 1.4984, lr_0 = 2.6333e-04
Loss = 3.8379e-01, PNorm = 61.7740, GNorm = 1.7505, lr_0 = 2.6315e-04
Loss = 3.7008e-01, PNorm = 61.7785, GNorm = 1.7776, lr_0 = 2.6297e-04
Loss = 3.5816e-01, PNorm = 61.7829, GNorm = 1.5448, lr_0 = 2.6279e-04
Loss = 3.5096e-01, PNorm = 61.7862, GNorm = 1.4387, lr_0 = 2.6261e-04
Loss = 3.0533e-01, PNorm = 61.7886, GNorm = 1.3486, lr_0 = 2.6243e-04
Loss = 3.3570e-01, PNorm = 61.7889, GNorm = 1.3540, lr_0 = 2.6225e-04
Loss = 3.3515e-01, PNorm = 61.7888, GNorm = 1.6452, lr_0 = 2.6207e-04
Loss = 3.6992e-01, PNorm = 61.7950, GNorm = 1.1925, lr_0 = 2.6189e-04
Loss = 3.6638e-01, PNorm = 61.7986, GNorm = 1.5554, lr_0 = 2.6171e-04
Loss = 3.7187e-01, PNorm = 61.8035, GNorm = 1.9424, lr_0 = 2.6153e-04
Loss = 3.4436e-01, PNorm = 61.8089, GNorm = 1.3108, lr_0 = 2.6136e-04
Loss = 3.8576e-01, PNorm = 61.8128, GNorm = 1.3527, lr_0 = 2.6118e-04
Loss = 4.3654e-01, PNorm = 61.8185, GNorm = 1.2676, lr_0 = 2.6100e-04
Loss = 4.0231e-01, PNorm = 61.8215, GNorm = 1.5758, lr_0 = 2.6082e-04
Loss = 4.1745e-01, PNorm = 61.8233, GNorm = 1.4032, lr_0 = 2.6064e-04
Loss = 3.2080e-01, PNorm = 61.8292, GNorm = 1.7338, lr_0 = 2.6046e-04
Loss = 3.9425e-01, PNorm = 61.8283, GNorm = 3.1276, lr_0 = 2.6028e-04
Loss = 3.8716e-01, PNorm = 61.8310, GNorm = 1.3253, lr_0 = 2.6011e-04
Loss = 4.0964e-01, PNorm = 61.8334, GNorm = 1.0336, lr_0 = 2.5993e-04
Loss = 2.8441e-01, PNorm = 61.8384, GNorm = 1.4669, lr_0 = 2.5975e-04
Loss = 3.5006e-01, PNorm = 61.8426, GNorm = 1.3299, lr_0 = 2.5957e-04
Loss = 3.4886e-01, PNorm = 61.8432, GNorm = 1.5060, lr_0 = 2.5939e-04
Loss = 4.0003e-01, PNorm = 61.8493, GNorm = 1.4260, lr_0 = 2.5922e-04
Loss = 3.6780e-01, PNorm = 61.8568, GNorm = 1.2568, lr_0 = 2.5904e-04
Loss = 4.1828e-01, PNorm = 61.8575, GNorm = 1.5191, lr_0 = 2.5886e-04
Loss = 3.6317e-01, PNorm = 61.8602, GNorm = 1.7307, lr_0 = 2.5868e-04
Loss = 4.2216e-01, PNorm = 61.8637, GNorm = 1.4212, lr_0 = 2.5851e-04
Loss = 3.8466e-01, PNorm = 61.8702, GNorm = 1.2761, lr_0 = 2.5833e-04
Loss = 3.5814e-01, PNorm = 61.8724, GNorm = 1.2529, lr_0 = 2.5815e-04
Loss = 3.6782e-01, PNorm = 61.8720, GNorm = 1.6579, lr_0 = 2.5797e-04
Loss = 3.5736e-01, PNorm = 61.8797, GNorm = 2.0012, lr_0 = 2.5780e-04
Loss = 3.3507e-01, PNorm = 61.8820, GNorm = 2.1127, lr_0 = 2.5762e-04
Loss = 3.5207e-01, PNorm = 61.8824, GNorm = 2.0610, lr_0 = 2.5745e-04
Loss = 4.2373e-01, PNorm = 61.8886, GNorm = 1.4700, lr_0 = 2.5727e-04
Loss = 3.6441e-01, PNorm = 61.8894, GNorm = 1.5533, lr_0 = 2.5709e-04
Loss = 3.9208e-01, PNorm = 61.8949, GNorm = 1.6006, lr_0 = 2.5692e-04
Loss = 3.8542e-01, PNorm = 61.8975, GNorm = 1.6080, lr_0 = 2.5674e-04
Loss = 3.6183e-01, PNorm = 61.9006, GNorm = 1.2603, lr_0 = 2.5656e-04
Loss = 3.6264e-01, PNorm = 61.9030, GNorm = 1.3364, lr_0 = 2.5639e-04
Loss = 3.3166e-01, PNorm = 61.9058, GNorm = 1.0104, lr_0 = 2.5621e-04
Loss = 3.3212e-01, PNorm = 61.9082, GNorm = 1.4178, lr_0 = 2.5604e-04
Loss = 3.8872e-01, PNorm = 61.9104, GNorm = 1.6768, lr_0 = 2.5586e-04
Loss = 4.2228e-01, PNorm = 61.9145, GNorm = 1.8324, lr_0 = 2.5569e-04
Loss = 3.4227e-01, PNorm = 61.9148, GNorm = 1.5307, lr_0 = 2.5551e-04
Loss = 3.0916e-01, PNorm = 61.9163, GNorm = 1.4536, lr_0 = 2.5534e-04
Loss = 4.1805e-01, PNorm = 61.9186, GNorm = 1.3782, lr_0 = 2.5516e-04
Loss = 3.7114e-01, PNorm = 61.9224, GNorm = 1.6381, lr_0 = 2.5499e-04
Loss = 3.9440e-01, PNorm = 61.9225, GNorm = 1.3763, lr_0 = 2.5481e-04
Loss = 3.9791e-01, PNorm = 61.9258, GNorm = 1.1853, lr_0 = 2.5464e-04
Loss = 3.6246e-01, PNorm = 61.9301, GNorm = 1.3111, lr_0 = 2.5446e-04
Loss = 3.2763e-01, PNorm = 61.9337, GNorm = 0.9920, lr_0 = 2.5429e-04
Loss = 3.3013e-01, PNorm = 61.9380, GNorm = 1.4880, lr_0 = 2.5411e-04
Loss = 4.0133e-01, PNorm = 61.9404, GNorm = 1.5178, lr_0 = 2.5394e-04
Loss = 3.9133e-01, PNorm = 61.9436, GNorm = 1.2320, lr_0 = 2.5377e-04
Loss = 3.7133e-01, PNorm = 61.9485, GNorm = 1.1465, lr_0 = 2.5359e-04
Loss = 4.2744e-01, PNorm = 61.9542, GNorm = 1.4378, lr_0 = 2.5342e-04
Loss = 3.5083e-01, PNorm = 61.9551, GNorm = 1.4914, lr_0 = 2.5325e-04
Loss = 3.2414e-01, PNorm = 61.9595, GNorm = 2.0231, lr_0 = 2.5307e-04
Loss = 3.5835e-01, PNorm = 61.9622, GNorm = 1.4878, lr_0 = 2.5290e-04
Loss = 4.2001e-01, PNorm = 61.9639, GNorm = 1.8412, lr_0 = 2.5273e-04
Loss = 3.5246e-01, PNorm = 61.9674, GNorm = 1.1571, lr_0 = 2.5255e-04
Loss = 3.8464e-01, PNorm = 61.9679, GNorm = 1.0174, lr_0 = 2.5238e-04
Loss = 3.5991e-01, PNorm = 61.9739, GNorm = 1.2412, lr_0 = 2.5221e-04
Loss = 3.8520e-01, PNorm = 61.9785, GNorm = 1.7033, lr_0 = 2.5203e-04
Loss = 3.5824e-01, PNorm = 61.9801, GNorm = 1.1252, lr_0 = 2.5186e-04
Loss = 3.8057e-01, PNorm = 61.9835, GNorm = 1.2850, lr_0 = 2.5169e-04
Loss = 3.7504e-01, PNorm = 61.9860, GNorm = 1.4723, lr_0 = 2.5152e-04
Loss = 3.6561e-01, PNorm = 61.9917, GNorm = 1.1089, lr_0 = 2.5134e-04
Loss = 3.2400e-01, PNorm = 61.9949, GNorm = 1.2596, lr_0 = 2.5117e-04
Loss = 3.5236e-01, PNorm = 61.9980, GNorm = 1.2775, lr_0 = 2.5100e-04
Loss = 3.9592e-01, PNorm = 62.0022, GNorm = 1.1030, lr_0 = 2.5083e-04
Loss = 3.5261e-01, PNorm = 62.0051, GNorm = 0.8573, lr_0 = 2.5066e-04
Loss = 3.8317e-01, PNorm = 62.0061, GNorm = 1.5049, lr_0 = 2.5048e-04
Loss = 3.7814e-01, PNorm = 62.0109, GNorm = 1.6300, lr_0 = 2.5031e-04
Loss = 3.1460e-01, PNorm = 62.0138, GNorm = 1.0480, lr_0 = 2.5014e-04
Loss = 3.5576e-01, PNorm = 62.0130, GNorm = 1.4095, lr_0 = 2.4997e-04
Loss = 4.0983e-01, PNorm = 62.0140, GNorm = 1.2399, lr_0 = 2.4980e-04
Loss = 3.8735e-01, PNorm = 62.0139, GNorm = 1.0591, lr_0 = 2.4963e-04
Loss = 3.4216e-01, PNorm = 62.0134, GNorm = 1.5780, lr_0 = 2.4946e-04
Loss = 4.5844e-01, PNorm = 62.0191, GNorm = 1.5691, lr_0 = 2.4929e-04
Loss = 4.2836e-01, PNorm = 62.0246, GNorm = 1.9051, lr_0 = 2.4911e-04
Loss = 3.8343e-01, PNorm = 62.0290, GNorm = 1.5600, lr_0 = 2.4894e-04
Loss = 3.7274e-01, PNorm = 62.0300, GNorm = 1.5804, lr_0 = 2.4877e-04
Loss = 3.9583e-01, PNorm = 62.0314, GNorm = 0.9669, lr_0 = 2.4860e-04
Loss = 4.5718e-01, PNorm = 62.0374, GNorm = 1.7014, lr_0 = 2.4843e-04
Loss = 3.1768e-01, PNorm = 62.0433, GNorm = 1.3709, lr_0 = 2.4826e-04
Loss = 3.4426e-01, PNorm = 62.0461, GNorm = 1.2778, lr_0 = 2.4809e-04
Loss = 3.7999e-01, PNorm = 62.0495, GNorm = 1.1638, lr_0 = 2.4792e-04
Loss = 4.0581e-01, PNorm = 62.0531, GNorm = 1.4787, lr_0 = 2.4775e-04
Loss = 3.8571e-01, PNorm = 62.0585, GNorm = 1.8429, lr_0 = 2.4758e-04
Loss = 3.6051e-01, PNorm = 62.0596, GNorm = 1.3773, lr_0 = 2.4741e-04
Loss = 3.5244e-01, PNorm = 62.0624, GNorm = 1.5765, lr_0 = 2.4724e-04
Loss = 3.8761e-01, PNorm = 62.0641, GNorm = 1.3638, lr_0 = 2.4707e-04
Validation mae = 0.112419
Epoch 19
Loss = 3.5179e-01, PNorm = 62.0682, GNorm = 1.4146, lr_0 = 2.4690e-04
Loss = 3.7687e-01, PNorm = 62.0671, GNorm = 1.5227, lr_0 = 2.4674e-04
Loss = 3.2763e-01, PNorm = 62.0706, GNorm = 1.4005, lr_0 = 2.4657e-04
Loss = 3.2001e-01, PNorm = 62.0739, GNorm = 1.1327, lr_0 = 2.4640e-04
Loss = 3.5323e-01, PNorm = 62.0749, GNorm = 0.9108, lr_0 = 2.4623e-04
Loss = 3.5719e-01, PNorm = 62.0769, GNorm = 1.1156, lr_0 = 2.4606e-04
Loss = 3.6063e-01, PNorm = 62.0761, GNorm = 1.2991, lr_0 = 2.4589e-04
Loss = 3.6558e-01, PNorm = 62.0811, GNorm = 1.6053, lr_0 = 2.4572e-04
Loss = 3.5536e-01, PNorm = 62.0850, GNorm = 1.2112, lr_0 = 2.4556e-04
Loss = 3.4245e-01, PNorm = 62.0873, GNorm = 1.8359, lr_0 = 2.4539e-04
Loss = 3.8259e-01, PNorm = 62.0909, GNorm = 1.7610, lr_0 = 2.4522e-04
Loss = 4.3108e-01, PNorm = 62.0943, GNorm = 1.5509, lr_0 = 2.4505e-04
Loss = 3.9239e-01, PNorm = 62.0970, GNorm = 1.7173, lr_0 = 2.4488e-04
Loss = 3.5629e-01, PNorm = 62.1016, GNorm = 1.7147, lr_0 = 2.4472e-04
Loss = 4.1273e-01, PNorm = 62.1017, GNorm = 1.9619, lr_0 = 2.4455e-04
Loss = 3.7166e-01, PNorm = 62.1063, GNorm = 1.6738, lr_0 = 2.4438e-04
Loss = 3.4107e-01, PNorm = 62.1093, GNorm = 1.2489, lr_0 = 2.4421e-04
Loss = 3.7669e-01, PNorm = 62.1144, GNorm = 1.9773, lr_0 = 2.4405e-04
Loss = 4.1824e-01, PNorm = 62.1173, GNorm = 1.5162, lr_0 = 2.4388e-04
Loss = 4.2537e-01, PNorm = 62.1210, GNorm = 2.3011, lr_0 = 2.4371e-04
Loss = 4.0861e-01, PNorm = 62.1255, GNorm = 1.4326, lr_0 = 2.4354e-04
Loss = 4.0007e-01, PNorm = 62.1315, GNorm = 2.0261, lr_0 = 2.4338e-04
Loss = 3.6843e-01, PNorm = 62.1360, GNorm = 1.4287, lr_0 = 2.4321e-04
Loss = 4.1650e-01, PNorm = 62.1377, GNorm = 1.4978, lr_0 = 2.4304e-04
Loss = 3.4961e-01, PNorm = 62.1419, GNorm = 1.6150, lr_0 = 2.4288e-04
Loss = 3.8923e-01, PNorm = 62.1457, GNorm = 1.6696, lr_0 = 2.4271e-04
Loss = 3.8314e-01, PNorm = 62.1491, GNorm = 1.1029, lr_0 = 2.4254e-04
Loss = 3.8794e-01, PNorm = 62.1542, GNorm = 1.0877, lr_0 = 2.4238e-04
Loss = 3.6496e-01, PNorm = 62.1559, GNorm = 1.5856, lr_0 = 2.4221e-04
Loss = 3.5103e-01, PNorm = 62.1592, GNorm = 1.3177, lr_0 = 2.4205e-04
Loss = 3.3544e-01, PNorm = 62.1623, GNorm = 1.3659, lr_0 = 2.4188e-04
Loss = 3.8851e-01, PNorm = 62.1651, GNorm = 1.7534, lr_0 = 2.4171e-04
Loss = 3.6256e-01, PNorm = 62.1682, GNorm = 1.1556, lr_0 = 2.4155e-04
Loss = 3.3585e-01, PNorm = 62.1691, GNorm = 1.1887, lr_0 = 2.4138e-04
Loss = 4.0343e-01, PNorm = 62.1721, GNorm = 1.5277, lr_0 = 2.4122e-04
Loss = 3.4157e-01, PNorm = 62.1763, GNorm = 1.2731, lr_0 = 2.4105e-04
Loss = 3.3085e-01, PNorm = 62.1774, GNorm = 1.5805, lr_0 = 2.4089e-04
Loss = 3.3229e-01, PNorm = 62.1803, GNorm = 1.6642, lr_0 = 2.4072e-04
Loss = 3.5054e-01, PNorm = 62.1806, GNorm = 1.6540, lr_0 = 2.4056e-04
Loss = 3.5917e-01, PNorm = 62.1812, GNorm = 0.9227, lr_0 = 2.4039e-04
Loss = 3.3143e-01, PNorm = 62.1828, GNorm = 1.3886, lr_0 = 2.4023e-04
Loss = 3.4476e-01, PNorm = 62.1874, GNorm = 1.1784, lr_0 = 2.4006e-04
Loss = 4.2094e-01, PNorm = 62.1916, GNorm = 1.5746, lr_0 = 2.3990e-04
Loss = 3.8235e-01, PNorm = 62.1971, GNorm = 1.5475, lr_0 = 2.3974e-04
Loss = 4.1036e-01, PNorm = 62.2008, GNorm = 1.7162, lr_0 = 2.3957e-04
Loss = 3.3673e-01, PNorm = 62.2001, GNorm = 1.3132, lr_0 = 2.3941e-04
Loss = 3.7636e-01, PNorm = 62.2029, GNorm = 1.5618, lr_0 = 2.3924e-04
Loss = 3.9379e-01, PNorm = 62.2043, GNorm = 1.8844, lr_0 = 2.3908e-04
Loss = 3.6919e-01, PNorm = 62.2097, GNorm = 1.7304, lr_0 = 2.3892e-04
Loss = 3.7775e-01, PNorm = 62.2129, GNorm = 1.3943, lr_0 = 2.3875e-04
Loss = 3.6897e-01, PNorm = 62.2156, GNorm = 1.0824, lr_0 = 2.3859e-04
Loss = 3.3874e-01, PNorm = 62.2172, GNorm = 1.3770, lr_0 = 2.3842e-04
Loss = 3.3450e-01, PNorm = 62.2166, GNorm = 2.0904, lr_0 = 2.3826e-04
Loss = 3.6761e-01, PNorm = 62.2201, GNorm = 1.6008, lr_0 = 2.3810e-04
Loss = 3.7082e-01, PNorm = 62.2259, GNorm = 2.5246, lr_0 = 2.3794e-04
Loss = 3.0334e-01, PNorm = 62.2268, GNorm = 1.2252, lr_0 = 2.3777e-04
Loss = 3.6906e-01, PNorm = 62.2278, GNorm = 1.2448, lr_0 = 2.3761e-04
Loss = 3.3314e-01, PNorm = 62.2306, GNorm = 2.0135, lr_0 = 2.3745e-04
Loss = 3.1531e-01, PNorm = 62.2313, GNorm = 1.2906, lr_0 = 2.3728e-04
Loss = 3.6075e-01, PNorm = 62.2344, GNorm = 1.3593, lr_0 = 2.3712e-04
Loss = 3.9648e-01, PNorm = 62.2363, GNorm = 1.4137, lr_0 = 2.3696e-04
Loss = 3.9278e-01, PNorm = 62.2384, GNorm = 1.8247, lr_0 = 2.3680e-04
Loss = 3.9642e-01, PNorm = 62.2445, GNorm = 1.8262, lr_0 = 2.3663e-04
Loss = 3.6442e-01, PNorm = 62.2455, GNorm = 0.9884, lr_0 = 2.3647e-04
Loss = 3.8501e-01, PNorm = 62.2470, GNorm = 1.8444, lr_0 = 2.3631e-04
Loss = 3.8443e-01, PNorm = 62.2488, GNorm = 1.5497, lr_0 = 2.3615e-04
Loss = 3.5324e-01, PNorm = 62.2487, GNorm = 1.6164, lr_0 = 2.3599e-04
Loss = 3.8676e-01, PNorm = 62.2512, GNorm = 2.2474, lr_0 = 2.3582e-04
Loss = 3.8223e-01, PNorm = 62.2544, GNorm = 1.9515, lr_0 = 2.3566e-04
Loss = 3.4728e-01, PNorm = 62.2546, GNorm = 1.5920, lr_0 = 2.3550e-04
Loss = 3.3455e-01, PNorm = 62.2532, GNorm = 1.9527, lr_0 = 2.3534e-04
Loss = 4.0497e-01, PNorm = 62.2570, GNorm = 0.9828, lr_0 = 2.3518e-04
Loss = 3.6571e-01, PNorm = 62.2562, GNorm = 1.1191, lr_0 = 2.3502e-04
Loss = 3.6089e-01, PNorm = 62.2574, GNorm = 1.2822, lr_0 = 2.3486e-04
Loss = 3.5769e-01, PNorm = 62.2601, GNorm = 1.1718, lr_0 = 2.3470e-04
Loss = 3.4229e-01, PNorm = 62.2637, GNorm = 1.1602, lr_0 = 2.3454e-04
Loss = 3.3591e-01, PNorm = 62.2655, GNorm = 1.1188, lr_0 = 2.3437e-04
Loss = 4.1628e-01, PNorm = 62.2704, GNorm = 1.3563, lr_0 = 2.3421e-04
Loss = 4.3919e-01, PNorm = 62.2701, GNorm = 1.7557, lr_0 = 2.3405e-04
Loss = 4.2022e-01, PNorm = 62.2724, GNorm = 1.5585, lr_0 = 2.3389e-04
Loss = 3.1193e-01, PNorm = 62.2797, GNorm = 2.0818, lr_0 = 2.3373e-04
Loss = 3.6015e-01, PNorm = 62.2846, GNorm = 1.4240, lr_0 = 2.3357e-04
Loss = 3.9005e-01, PNorm = 62.2891, GNorm = 1.4339, lr_0 = 2.3341e-04
Loss = 4.3957e-01, PNorm = 62.2948, GNorm = 1.9172, lr_0 = 2.3325e-04
Loss = 4.1313e-01, PNorm = 62.3012, GNorm = 1.4152, lr_0 = 2.3309e-04
Loss = 3.6625e-01, PNorm = 62.3072, GNorm = 1.4074, lr_0 = 2.3293e-04
Loss = 3.9927e-01, PNorm = 62.3101, GNorm = 1.0496, lr_0 = 2.3277e-04
Loss = 3.9694e-01, PNorm = 62.3115, GNorm = 1.6170, lr_0 = 2.3261e-04
Loss = 3.5651e-01, PNorm = 62.3133, GNorm = 1.4384, lr_0 = 2.3246e-04
Loss = 3.9038e-01, PNorm = 62.3155, GNorm = 1.5306, lr_0 = 2.3230e-04
Loss = 3.5027e-01, PNorm = 62.3185, GNorm = 1.4553, lr_0 = 2.3214e-04
Loss = 4.0865e-01, PNorm = 62.3204, GNorm = 1.3829, lr_0 = 2.3198e-04
Loss = 3.3334e-01, PNorm = 62.3225, GNorm = 1.4308, lr_0 = 2.3182e-04
Loss = 3.5767e-01, PNorm = 62.3261, GNorm = 1.5278, lr_0 = 2.3166e-04
Loss = 3.4089e-01, PNorm = 62.3303, GNorm = 1.2839, lr_0 = 2.3150e-04
Loss = 4.0505e-01, PNorm = 62.3328, GNorm = 1.6212, lr_0 = 2.3134e-04
Loss = 3.5380e-01, PNorm = 62.3357, GNorm = 1.3391, lr_0 = 2.3118e-04
Loss = 3.4741e-01, PNorm = 62.3380, GNorm = 1.3419, lr_0 = 2.3103e-04
Loss = 3.6008e-01, PNorm = 62.3411, GNorm = 1.0012, lr_0 = 2.3087e-04
Loss = 3.1810e-01, PNorm = 62.3419, GNorm = 1.4461, lr_0 = 2.3071e-04
Loss = 3.3483e-01, PNorm = 62.3439, GNorm = 2.0514, lr_0 = 2.3055e-04
Loss = 3.1124e-01, PNorm = 62.3458, GNorm = 1.0942, lr_0 = 2.3039e-04
Loss = 3.4216e-01, PNorm = 62.3471, GNorm = 1.3011, lr_0 = 2.3024e-04
Loss = 3.6923e-01, PNorm = 62.3495, GNorm = 1.1219, lr_0 = 2.3008e-04
Loss = 3.4315e-01, PNorm = 62.3530, GNorm = 1.5715, lr_0 = 2.2992e-04
Loss = 3.7015e-01, PNorm = 62.3535, GNorm = 1.5028, lr_0 = 2.2976e-04
Loss = 3.9525e-01, PNorm = 62.3572, GNorm = 1.8276, lr_0 = 2.2961e-04
Loss = 3.4937e-01, PNorm = 62.3612, GNorm = 1.3669, lr_0 = 2.2945e-04
Loss = 4.1290e-01, PNorm = 62.3615, GNorm = 1.5117, lr_0 = 2.2929e-04
Loss = 3.2243e-01, PNorm = 62.3650, GNorm = 1.4300, lr_0 = 2.2913e-04
Loss = 3.7750e-01, PNorm = 62.3662, GNorm = 1.2890, lr_0 = 2.2898e-04
Loss = 3.2861e-01, PNorm = 62.3685, GNorm = 1.0111, lr_0 = 2.2882e-04
Loss = 3.5467e-01, PNorm = 62.3694, GNorm = 1.6555, lr_0 = 2.2866e-04
Loss = 3.6573e-01, PNorm = 62.3706, GNorm = 1.1183, lr_0 = 2.2851e-04
Loss = 3.5753e-01, PNorm = 62.3718, GNorm = 1.6835, lr_0 = 2.2835e-04
Loss = 3.7952e-01, PNorm = 62.3733, GNorm = 1.2720, lr_0 = 2.2819e-04
Loss = 3.9467e-01, PNorm = 62.3744, GNorm = 1.2785, lr_0 = 2.2804e-04
Loss = 3.7532e-01, PNorm = 62.3777, GNorm = 1.2331, lr_0 = 2.2788e-04
Loss = 3.9503e-01, PNorm = 62.3805, GNorm = 1.2242, lr_0 = 2.2773e-04
Loss = 3.7891e-01, PNorm = 62.3846, GNorm = 1.3103, lr_0 = 2.2757e-04
Validation mae = 0.111562
Epoch 20
Loss = 3.9461e-01, PNorm = 62.3876, GNorm = 1.5885, lr_0 = 2.2741e-04
Loss = 3.7752e-01, PNorm = 62.3905, GNorm = 1.1280, lr_0 = 2.2726e-04
Loss = 3.6157e-01, PNorm = 62.3895, GNorm = 1.7820, lr_0 = 2.2710e-04
Loss = 3.7659e-01, PNorm = 62.3883, GNorm = 1.7300, lr_0 = 2.2695e-04
Loss = 3.7070e-01, PNorm = 62.3924, GNorm = 1.6600, lr_0 = 2.2679e-04
Loss = 3.2189e-01, PNorm = 62.3961, GNorm = 1.6865, lr_0 = 2.2664e-04
Loss = 3.4383e-01, PNorm = 62.4023, GNorm = 1.5880, lr_0 = 2.2648e-04
Loss = 3.5255e-01, PNorm = 62.4034, GNorm = 1.3532, lr_0 = 2.2632e-04
Loss = 3.2219e-01, PNorm = 62.4052, GNorm = 1.2062, lr_0 = 2.2617e-04
Loss = 3.2680e-01, PNorm = 62.4057, GNorm = 0.9675, lr_0 = 2.2601e-04
Loss = 3.4837e-01, PNorm = 62.4088, GNorm = 1.6460, lr_0 = 2.2586e-04
Loss = 3.5969e-01, PNorm = 62.4151, GNorm = 1.9727, lr_0 = 2.2571e-04
Loss = 3.7154e-01, PNorm = 62.4206, GNorm = 1.8495, lr_0 = 2.2555e-04
Loss = 3.5618e-01, PNorm = 62.4239, GNorm = 1.4283, lr_0 = 2.2540e-04
Loss = 3.0994e-01, PNorm = 62.4277, GNorm = 1.4913, lr_0 = 2.2524e-04
Loss = 3.5719e-01, PNorm = 62.4306, GNorm = 1.4397, lr_0 = 2.2509e-04
Loss = 3.6394e-01, PNorm = 62.4329, GNorm = 1.3802, lr_0 = 2.2493e-04
Loss = 4.0087e-01, PNorm = 62.4348, GNorm = 1.5939, lr_0 = 2.2478e-04
Loss = 3.8632e-01, PNorm = 62.4363, GNorm = 1.6997, lr_0 = 2.2463e-04
Loss = 3.8141e-01, PNorm = 62.4385, GNorm = 2.0070, lr_0 = 2.2447e-04
Loss = 3.5596e-01, PNorm = 62.4416, GNorm = 1.2345, lr_0 = 2.2432e-04
Loss = 3.3682e-01, PNorm = 62.4403, GNorm = 1.3435, lr_0 = 2.2416e-04
Loss = 3.9375e-01, PNorm = 62.4427, GNorm = 1.9003, lr_0 = 2.2401e-04
Loss = 3.8038e-01, PNorm = 62.4478, GNorm = 1.5814, lr_0 = 2.2386e-04
Loss = 3.4502e-01, PNorm = 62.4501, GNorm = 2.8918, lr_0 = 2.2370e-04
Loss = 3.1849e-01, PNorm = 62.4506, GNorm = 1.1644, lr_0 = 2.2355e-04
Loss = 3.5916e-01, PNorm = 62.4518, GNorm = 1.5318, lr_0 = 2.2340e-04
Loss = 3.2838e-01, PNorm = 62.4523, GNorm = 1.1239, lr_0 = 2.2324e-04
Loss = 4.2828e-01, PNorm = 62.4527, GNorm = 1.5983, lr_0 = 2.2309e-04
Loss = 3.8825e-01, PNorm = 62.4540, GNorm = 1.2041, lr_0 = 2.2294e-04
Loss = 3.5345e-01, PNorm = 62.4566, GNorm = 1.5419, lr_0 = 2.2279e-04
Loss = 3.1257e-01, PNorm = 62.4597, GNorm = 1.1469, lr_0 = 2.2263e-04
Loss = 3.6377e-01, PNorm = 62.4621, GNorm = 1.3043, lr_0 = 2.2248e-04
Loss = 3.7034e-01, PNorm = 62.4643, GNorm = 2.4805, lr_0 = 2.2233e-04
Loss = 4.0586e-01, PNorm = 62.4648, GNorm = 1.4553, lr_0 = 2.2218e-04
Loss = 3.8632e-01, PNorm = 62.4681, GNorm = 1.2226, lr_0 = 2.2202e-04
Loss = 3.6487e-01, PNorm = 62.4731, GNorm = 1.7101, lr_0 = 2.2187e-04
Loss = 3.5251e-01, PNorm = 62.4746, GNorm = 1.6190, lr_0 = 2.2172e-04
Loss = 3.8778e-01, PNorm = 62.4767, GNorm = 1.3627, lr_0 = 2.2157e-04
Loss = 3.6919e-01, PNorm = 62.4799, GNorm = 1.7248, lr_0 = 2.2142e-04
Loss = 3.7634e-01, PNorm = 62.4805, GNorm = 2.0795, lr_0 = 2.2126e-04
Loss = 3.8845e-01, PNorm = 62.4841, GNorm = 1.5905, lr_0 = 2.2111e-04
Loss = 3.3839e-01, PNorm = 62.4875, GNorm = 1.4983, lr_0 = 2.2096e-04
Loss = 3.9282e-01, PNorm = 62.4866, GNorm = 1.4411, lr_0 = 2.2081e-04
Loss = 3.7234e-01, PNorm = 62.4890, GNorm = 1.4782, lr_0 = 2.2066e-04
Loss = 4.0483e-01, PNorm = 62.4903, GNorm = 1.1693, lr_0 = 2.2051e-04
Loss = 3.3524e-01, PNorm = 62.4937, GNorm = 1.2223, lr_0 = 2.2036e-04
Loss = 3.9548e-01, PNorm = 62.4964, GNorm = 1.5539, lr_0 = 2.2021e-04
Loss = 3.3367e-01, PNorm = 62.4978, GNorm = 1.0378, lr_0 = 2.2005e-04
Loss = 3.5066e-01, PNorm = 62.4992, GNorm = 1.7778, lr_0 = 2.1990e-04
Loss = 3.6636e-01, PNorm = 62.5032, GNorm = 1.3058, lr_0 = 2.1975e-04
Loss = 3.7324e-01, PNorm = 62.5059, GNorm = 2.0801, lr_0 = 2.1960e-04
Loss = 3.6361e-01, PNorm = 62.5068, GNorm = 1.4013, lr_0 = 2.1945e-04
Loss = 3.6478e-01, PNorm = 62.5092, GNorm = 2.0803, lr_0 = 2.1930e-04
Loss = 3.3537e-01, PNorm = 62.5102, GNorm = 1.5170, lr_0 = 2.1915e-04
Loss = 3.0703e-01, PNorm = 62.5122, GNorm = 1.2974, lr_0 = 2.1900e-04
Loss = 3.4917e-01, PNorm = 62.5087, GNorm = 1.2426, lr_0 = 2.1885e-04
Loss = 3.9648e-01, PNorm = 62.5078, GNorm = 1.3983, lr_0 = 2.1870e-04
Loss = 3.7411e-01, PNorm = 62.5105, GNorm = 1.5328, lr_0 = 2.1855e-04
Loss = 3.8173e-01, PNorm = 62.5094, GNorm = 1.6607, lr_0 = 2.1840e-04
Loss = 4.1144e-01, PNorm = 62.5101, GNorm = 2.0652, lr_0 = 2.1825e-04
Loss = 3.4273e-01, PNorm = 62.5152, GNorm = 2.1621, lr_0 = 2.1810e-04
Loss = 3.6874e-01, PNorm = 62.5160, GNorm = 1.4636, lr_0 = 2.1795e-04
Loss = 3.7696e-01, PNorm = 62.5189, GNorm = 1.6465, lr_0 = 2.1780e-04
Loss = 3.5053e-01, PNorm = 62.5196, GNorm = 1.5125, lr_0 = 2.1765e-04
Loss = 3.5098e-01, PNorm = 62.5217, GNorm = 1.0755, lr_0 = 2.1751e-04
Loss = 3.8418e-01, PNorm = 62.5243, GNorm = 1.6464, lr_0 = 2.1736e-04
Loss = 3.8677e-01, PNorm = 62.5291, GNorm = 1.9231, lr_0 = 2.1721e-04
Loss = 3.5043e-01, PNorm = 62.5338, GNorm = 1.1402, lr_0 = 2.1706e-04
Loss = 4.1526e-01, PNorm = 62.5340, GNorm = 1.4655, lr_0 = 2.1691e-04
Loss = 3.5902e-01, PNorm = 62.5364, GNorm = 1.5691, lr_0 = 2.1676e-04
Loss = 3.5216e-01, PNorm = 62.5396, GNorm = 1.1185, lr_0 = 2.1661e-04
Loss = 3.2061e-01, PNorm = 62.5431, GNorm = 1.4617, lr_0 = 2.1646e-04
Loss = 3.6875e-01, PNorm = 62.5458, GNorm = 1.5938, lr_0 = 2.1632e-04
Loss = 3.6427e-01, PNorm = 62.5488, GNorm = 1.6762, lr_0 = 2.1617e-04
Loss = 3.4068e-01, PNorm = 62.5516, GNorm = 1.2225, lr_0 = 2.1602e-04
Loss = 3.7628e-01, PNorm = 62.5554, GNorm = 1.2237, lr_0 = 2.1587e-04
Loss = 3.5871e-01, PNorm = 62.5572, GNorm = 1.2911, lr_0 = 2.1572e-04
Loss = 3.8225e-01, PNorm = 62.5611, GNorm = 1.2650, lr_0 = 2.1558e-04
Loss = 3.2989e-01, PNorm = 62.5648, GNorm = 1.6235, lr_0 = 2.1543e-04
Loss = 3.6560e-01, PNorm = 62.5679, GNorm = 1.3736, lr_0 = 2.1528e-04
Loss = 3.5731e-01, PNorm = 62.5687, GNorm = 1.1225, lr_0 = 2.1513e-04
Loss = 3.3206e-01, PNorm = 62.5700, GNorm = 1.5811, lr_0 = 2.1499e-04
Loss = 3.5575e-01, PNorm = 62.5712, GNorm = 1.5231, lr_0 = 2.1484e-04
Loss = 3.6057e-01, PNorm = 62.5730, GNorm = 1.8699, lr_0 = 2.1469e-04
Loss = 3.4103e-01, PNorm = 62.5758, GNorm = 1.2042, lr_0 = 2.1454e-04
Loss = 4.8644e-01, PNorm = 62.5780, GNorm = 1.7405, lr_0 = 2.1440e-04
Loss = 3.9018e-01, PNorm = 62.5842, GNorm = 1.3344, lr_0 = 2.1425e-04
Loss = 3.9211e-01, PNorm = 62.5866, GNorm = 1.3578, lr_0 = 2.1410e-04
Loss = 3.8945e-01, PNorm = 62.5911, GNorm = 1.6759, lr_0 = 2.1396e-04
Loss = 3.4511e-01, PNorm = 62.5932, GNorm = 1.2589, lr_0 = 2.1381e-04
Loss = 3.5473e-01, PNorm = 62.5943, GNorm = 1.4180, lr_0 = 2.1366e-04
Loss = 3.6573e-01, PNorm = 62.5971, GNorm = 1.6968, lr_0 = 2.1352e-04
Loss = 4.4558e-01, PNorm = 62.6024, GNorm = 1.4894, lr_0 = 2.1337e-04
Loss = 3.1453e-01, PNorm = 62.6058, GNorm = 0.9995, lr_0 = 2.1323e-04
Loss = 4.0112e-01, PNorm = 62.6061, GNorm = 1.5053, lr_0 = 2.1308e-04
Loss = 3.9101e-01, PNorm = 62.6081, GNorm = 1.3530, lr_0 = 2.1293e-04
Loss = 3.7485e-01, PNorm = 62.6100, GNorm = 1.5625, lr_0 = 2.1279e-04
Loss = 3.5585e-01, PNorm = 62.6117, GNorm = 1.1577, lr_0 = 2.1264e-04
Loss = 3.1118e-01, PNorm = 62.6148, GNorm = 1.3863, lr_0 = 2.1250e-04
Loss = 3.8559e-01, PNorm = 62.6181, GNorm = 1.0368, lr_0 = 2.1235e-04
Loss = 3.6513e-01, PNorm = 62.6203, GNorm = 1.2971, lr_0 = 2.1221e-04
Loss = 3.4243e-01, PNorm = 62.6228, GNorm = 1.1939, lr_0 = 2.1206e-04
Loss = 3.8251e-01, PNorm = 62.6248, GNorm = 1.1867, lr_0 = 2.1191e-04
Loss = 3.4958e-01, PNorm = 62.6291, GNorm = 2.2015, lr_0 = 2.1177e-04
Loss = 3.0791e-01, PNorm = 62.6289, GNorm = 1.1138, lr_0 = 2.1162e-04
Loss = 3.3388e-01, PNorm = 62.6332, GNorm = 1.4669, lr_0 = 2.1148e-04
Loss = 3.5362e-01, PNorm = 62.6375, GNorm = 1.2269, lr_0 = 2.1133e-04
Loss = 3.7346e-01, PNorm = 62.6394, GNorm = 1.9170, lr_0 = 2.1119e-04
Loss = 3.5988e-01, PNorm = 62.6440, GNorm = 1.9460, lr_0 = 2.1104e-04
Loss = 3.8450e-01, PNorm = 62.6461, GNorm = 1.4370, lr_0 = 2.1090e-04
Loss = 3.8902e-01, PNorm = 62.6483, GNorm = 1.2508, lr_0 = 2.1076e-04
Loss = 3.7574e-01, PNorm = 62.6506, GNorm = 1.6419, lr_0 = 2.1061e-04
Loss = 3.9998e-01, PNorm = 62.6538, GNorm = 1.6984, lr_0 = 2.1047e-04
Loss = 3.6108e-01, PNorm = 62.6572, GNorm = 1.8558, lr_0 = 2.1032e-04
Loss = 3.5839e-01, PNorm = 62.6554, GNorm = 1.5575, lr_0 = 2.1018e-04
Loss = 3.8332e-01, PNorm = 62.6576, GNorm = 1.5061, lr_0 = 2.1003e-04
Loss = 3.3392e-01, PNorm = 62.6606, GNorm = 1.4829, lr_0 = 2.0989e-04
Loss = 4.1005e-01, PNorm = 62.6598, GNorm = 1.2260, lr_0 = 2.0975e-04
Loss = 3.6633e-01, PNorm = 62.6638, GNorm = 1.1425, lr_0 = 2.0960e-04
Validation mae = 0.111513
Epoch 21
Loss = 3.7743e-01, PNorm = 62.6676, GNorm = 1.0460, lr_0 = 2.0946e-04
Loss = 3.2414e-01, PNorm = 62.6685, GNorm = 1.2880, lr_0 = 2.0932e-04
Loss = 3.5483e-01, PNorm = 62.6707, GNorm = 1.2768, lr_0 = 2.0917e-04
Loss = 3.4821e-01, PNorm = 62.6713, GNorm = 1.2786, lr_0 = 2.0903e-04
Loss = 3.2734e-01, PNorm = 62.6721, GNorm = 1.8633, lr_0 = 2.0889e-04
Loss = 3.7257e-01, PNorm = 62.6748, GNorm = 2.1757, lr_0 = 2.0874e-04
Loss = 3.1206e-01, PNorm = 62.6763, GNorm = 1.6027, lr_0 = 2.0860e-04
Loss = 3.7332e-01, PNorm = 62.6762, GNorm = 1.1161, lr_0 = 2.0846e-04
Loss = 3.5936e-01, PNorm = 62.6801, GNorm = 1.3968, lr_0 = 2.0831e-04
Loss = 3.9980e-01, PNorm = 62.6828, GNorm = 1.4426, lr_0 = 2.0817e-04
Loss = 3.7389e-01, PNorm = 62.6826, GNorm = 1.2935, lr_0 = 2.0803e-04
Loss = 4.4003e-01, PNorm = 62.6880, GNorm = 1.0766, lr_0 = 2.0789e-04
Loss = 4.1327e-01, PNorm = 62.6929, GNorm = 1.4579, lr_0 = 2.0774e-04
Loss = 3.3923e-01, PNorm = 62.6969, GNorm = 1.3416, lr_0 = 2.0760e-04
Loss = 3.5391e-01, PNorm = 62.7020, GNorm = 1.4671, lr_0 = 2.0746e-04
Loss = 3.8672e-01, PNorm = 62.7046, GNorm = 1.7382, lr_0 = 2.0732e-04
Loss = 3.6122e-01, PNorm = 62.7091, GNorm = 1.5832, lr_0 = 2.0718e-04
Loss = 3.9247e-01, PNorm = 62.7104, GNorm = 1.7408, lr_0 = 2.0703e-04
Loss = 3.5929e-01, PNorm = 62.7113, GNorm = 1.4449, lr_0 = 2.0689e-04
Loss = 3.9884e-01, PNorm = 62.7146, GNorm = 1.3133, lr_0 = 2.0675e-04
Loss = 3.2903e-01, PNorm = 62.7159, GNorm = 1.1088, lr_0 = 2.0661e-04
Loss = 3.3203e-01, PNorm = 62.7160, GNorm = 1.6473, lr_0 = 2.0647e-04
Loss = 3.4344e-01, PNorm = 62.7172, GNorm = 1.3837, lr_0 = 2.0633e-04
Loss = 3.5221e-01, PNorm = 62.7206, GNorm = 2.3517, lr_0 = 2.0618e-04
Loss = 3.7018e-01, PNorm = 62.7225, GNorm = 1.1063, lr_0 = 2.0604e-04
Loss = 3.4848e-01, PNorm = 62.7243, GNorm = 1.1933, lr_0 = 2.0590e-04
Loss = 3.0832e-01, PNorm = 62.7256, GNorm = 1.6492, lr_0 = 2.0576e-04
Loss = 3.5622e-01, PNorm = 62.7272, GNorm = 1.6297, lr_0 = 2.0562e-04
Loss = 3.7351e-01, PNorm = 62.7285, GNorm = 1.3530, lr_0 = 2.0548e-04
Loss = 3.8388e-01, PNorm = 62.7321, GNorm = 1.7797, lr_0 = 2.0534e-04
Loss = 3.8688e-01, PNorm = 62.7346, GNorm = 1.9316, lr_0 = 2.0520e-04
Loss = 3.7370e-01, PNorm = 62.7368, GNorm = 1.3083, lr_0 = 2.0506e-04
Loss = 3.6113e-01, PNorm = 62.7392, GNorm = 1.7064, lr_0 = 2.0492e-04
Loss = 4.0663e-01, PNorm = 62.7406, GNorm = 1.4783, lr_0 = 2.0478e-04
Loss = 3.5527e-01, PNorm = 62.7420, GNorm = 1.3158, lr_0 = 2.0464e-04
Loss = 3.1653e-01, PNorm = 62.7440, GNorm = 1.2850, lr_0 = 2.0450e-04
Loss = 3.3192e-01, PNorm = 62.7462, GNorm = 1.3907, lr_0 = 2.0436e-04
Loss = 3.7134e-01, PNorm = 62.7482, GNorm = 2.3865, lr_0 = 2.0422e-04
Loss = 3.6347e-01, PNorm = 62.7499, GNorm = 1.5103, lr_0 = 2.0408e-04
Loss = 3.8714e-01, PNorm = 62.7537, GNorm = 1.3982, lr_0 = 2.0394e-04
Loss = 3.4938e-01, PNorm = 62.7557, GNorm = 2.2781, lr_0 = 2.0380e-04
Loss = 3.6016e-01, PNorm = 62.7570, GNorm = 1.3763, lr_0 = 2.0366e-04
Loss = 3.6324e-01, PNorm = 62.7581, GNorm = 1.6417, lr_0 = 2.0352e-04
Loss = 3.0065e-01, PNorm = 62.7610, GNorm = 1.6303, lr_0 = 2.0338e-04
Loss = 4.3403e-01, PNorm = 62.7644, GNorm = 2.5071, lr_0 = 2.0324e-04
Loss = 3.6596e-01, PNorm = 62.7681, GNorm = 1.4919, lr_0 = 2.0310e-04
Loss = 3.3076e-01, PNorm = 62.7696, GNorm = 1.4256, lr_0 = 2.0296e-04
Loss = 3.6375e-01, PNorm = 62.7693, GNorm = 1.2844, lr_0 = 2.0282e-04
Loss = 3.4156e-01, PNorm = 62.7727, GNorm = 1.6869, lr_0 = 2.0268e-04
Loss = 3.8812e-01, PNorm = 62.7758, GNorm = 1.2772, lr_0 = 2.0254e-04
Loss = 3.6310e-01, PNorm = 62.7777, GNorm = 1.6401, lr_0 = 2.0240e-04
Loss = 3.0722e-01, PNorm = 62.7778, GNorm = 1.0742, lr_0 = 2.0227e-04
Loss = 3.6091e-01, PNorm = 62.7790, GNorm = 1.8686, lr_0 = 2.0213e-04
Loss = 3.3862e-01, PNorm = 62.7805, GNorm = 1.4092, lr_0 = 2.0199e-04
Loss = 3.7193e-01, PNorm = 62.7810, GNorm = 2.2982, lr_0 = 2.0185e-04
Loss = 3.6502e-01, PNorm = 62.7824, GNorm = 1.9048, lr_0 = 2.0171e-04
Loss = 3.6613e-01, PNorm = 62.7872, GNorm = 1.7463, lr_0 = 2.0157e-04
Loss = 3.8571e-01, PNorm = 62.7901, GNorm = 1.4009, lr_0 = 2.0144e-04
Loss = 3.6013e-01, PNorm = 62.7903, GNorm = 1.8704, lr_0 = 2.0130e-04
Loss = 3.6084e-01, PNorm = 62.7930, GNorm = 1.3077, lr_0 = 2.0116e-04
Loss = 3.8991e-01, PNorm = 62.7951, GNorm = 1.5645, lr_0 = 2.0102e-04
Loss = 3.6417e-01, PNorm = 62.7986, GNorm = 1.5526, lr_0 = 2.0088e-04
Loss = 3.3059e-01, PNorm = 62.8004, GNorm = 1.4956, lr_0 = 2.0075e-04
Loss = 3.3722e-01, PNorm = 62.8001, GNorm = 1.2344, lr_0 = 2.0061e-04
Loss = 3.5912e-01, PNorm = 62.8027, GNorm = 1.0822, lr_0 = 2.0047e-04
Loss = 3.2824e-01, PNorm = 62.8060, GNorm = 1.4569, lr_0 = 2.0033e-04
Loss = 3.9394e-01, PNorm = 62.8090, GNorm = 0.9511, lr_0 = 2.0020e-04
Loss = 3.6134e-01, PNorm = 62.8113, GNorm = 1.2718, lr_0 = 2.0006e-04
Loss = 4.2872e-01, PNorm = 62.8135, GNorm = 1.1597, lr_0 = 1.9992e-04
Loss = 3.7958e-01, PNorm = 62.8146, GNorm = 1.3859, lr_0 = 1.9979e-04
Loss = 3.4080e-01, PNorm = 62.8185, GNorm = 1.4841, lr_0 = 1.9965e-04
Loss = 3.3771e-01, PNorm = 62.8223, GNorm = 1.3661, lr_0 = 1.9951e-04
Loss = 3.5774e-01, PNorm = 62.8213, GNorm = 1.5183, lr_0 = 1.9938e-04
Loss = 4.0774e-01, PNorm = 62.8221, GNorm = 1.2548, lr_0 = 1.9924e-04
Loss = 3.5036e-01, PNorm = 62.8257, GNorm = 1.2582, lr_0 = 1.9910e-04
Loss = 3.1304e-01, PNorm = 62.8286, GNorm = 1.5171, lr_0 = 1.9897e-04
Loss = 3.1187e-01, PNorm = 62.8312, GNorm = 1.0957, lr_0 = 1.9883e-04
Loss = 3.1868e-01, PNorm = 62.8334, GNorm = 1.2562, lr_0 = 1.9869e-04
Loss = 3.1342e-01, PNorm = 62.8363, GNorm = 1.1901, lr_0 = 1.9856e-04
Loss = 3.9224e-01, PNorm = 62.8373, GNorm = 1.9386, lr_0 = 1.9842e-04
Loss = 4.3745e-01, PNorm = 62.8400, GNorm = 1.8610, lr_0 = 1.9829e-04
Loss = 4.1995e-01, PNorm = 62.8417, GNorm = 1.3273, lr_0 = 1.9815e-04
Loss = 4.1698e-01, PNorm = 62.8449, GNorm = 1.1264, lr_0 = 1.9801e-04
Loss = 3.8351e-01, PNorm = 62.8467, GNorm = 1.2726, lr_0 = 1.9788e-04
Loss = 3.3093e-01, PNorm = 62.8522, GNorm = 1.5214, lr_0 = 1.9774e-04
Loss = 4.1246e-01, PNorm = 62.8560, GNorm = 1.6122, lr_0 = 1.9761e-04
Loss = 3.4346e-01, PNorm = 62.8601, GNorm = 1.5289, lr_0 = 1.9747e-04
Loss = 3.6578e-01, PNorm = 62.8595, GNorm = 1.1375, lr_0 = 1.9734e-04
Loss = 3.8692e-01, PNorm = 62.8589, GNorm = 1.7730, lr_0 = 1.9720e-04
Loss = 3.1974e-01, PNorm = 62.8616, GNorm = 1.6022, lr_0 = 1.9707e-04
Loss = 3.3566e-01, PNorm = 62.8644, GNorm = 1.0284, lr_0 = 1.9693e-04
Loss = 3.6344e-01, PNorm = 62.8660, GNorm = 1.4203, lr_0 = 1.9680e-04
Loss = 3.4771e-01, PNorm = 62.8686, GNorm = 1.8186, lr_0 = 1.9666e-04
Loss = 3.5890e-01, PNorm = 62.8707, GNorm = 1.7050, lr_0 = 1.9653e-04
Loss = 3.7106e-01, PNorm = 62.8717, GNorm = 1.3362, lr_0 = 1.9639e-04
Loss = 3.3243e-01, PNorm = 62.8719, GNorm = 1.0572, lr_0 = 1.9626e-04
Loss = 3.3373e-01, PNorm = 62.8741, GNorm = 1.2988, lr_0 = 1.9612e-04
Loss = 3.3681e-01, PNorm = 62.8743, GNorm = 1.5828, lr_0 = 1.9599e-04
Loss = 3.6779e-01, PNorm = 62.8749, GNorm = 1.4395, lr_0 = 1.9585e-04
Loss = 3.1949e-01, PNorm = 62.8791, GNorm = 1.3470, lr_0 = 1.9572e-04
Loss = 3.8008e-01, PNorm = 62.8799, GNorm = 1.1169, lr_0 = 1.9559e-04
Loss = 3.7708e-01, PNorm = 62.8826, GNorm = 1.3827, lr_0 = 1.9545e-04
Loss = 3.2817e-01, PNorm = 62.8863, GNorm = 1.2639, lr_0 = 1.9532e-04
Loss = 3.4839e-01, PNorm = 62.8891, GNorm = 1.0789, lr_0 = 1.9518e-04
Loss = 3.9137e-01, PNorm = 62.8902, GNorm = 1.7386, lr_0 = 1.9505e-04
Loss = 3.5640e-01, PNorm = 62.8915, GNorm = 1.7644, lr_0 = 1.9492e-04
Loss = 3.5688e-01, PNorm = 62.8963, GNorm = 1.4488, lr_0 = 1.9478e-04
Loss = 3.3766e-01, PNorm = 62.8980, GNorm = 1.3006, lr_0 = 1.9465e-04
Loss = 4.0632e-01, PNorm = 62.9007, GNorm = 1.5773, lr_0 = 1.9452e-04
Loss = 3.7362e-01, PNorm = 62.9058, GNorm = 1.4171, lr_0 = 1.9438e-04
Loss = 3.4482e-01, PNorm = 62.9100, GNorm = 1.4362, lr_0 = 1.9425e-04
Loss = 3.7294e-01, PNorm = 62.9128, GNorm = 1.2758, lr_0 = 1.9412e-04
Loss = 3.5925e-01, PNorm = 62.9126, GNorm = 1.4880, lr_0 = 1.9398e-04
Loss = 3.1677e-01, PNorm = 62.9135, GNorm = 1.2192, lr_0 = 1.9385e-04
Loss = 3.3757e-01, PNorm = 62.9165, GNorm = 1.4133, lr_0 = 1.9372e-04
Loss = 3.2745e-01, PNorm = 62.9176, GNorm = 1.1389, lr_0 = 1.9359e-04
Loss = 4.3931e-01, PNorm = 62.9181, GNorm = 1.2893, lr_0 = 1.9345e-04
Loss = 3.4430e-01, PNorm = 62.9199, GNorm = 1.5380, lr_0 = 1.9332e-04
Loss = 3.4009e-01, PNorm = 62.9217, GNorm = 1.7016, lr_0 = 1.9319e-04
Loss = 3.8298e-01, PNorm = 62.9240, GNorm = 2.2187, lr_0 = 1.9306e-04
Validation mae = 0.112232
Epoch 22
Loss = 3.8091e-01, PNorm = 62.9267, GNorm = 1.8245, lr_0 = 1.9292e-04
Loss = 3.1814e-01, PNorm = 62.9284, GNorm = 2.0813, lr_0 = 1.9279e-04
Loss = 3.3158e-01, PNorm = 62.9316, GNorm = 1.2474, lr_0 = 1.9266e-04
Loss = 3.3059e-01, PNorm = 62.9326, GNorm = 1.4136, lr_0 = 1.9253e-04
Loss = 3.2942e-01, PNorm = 62.9340, GNorm = 2.1028, lr_0 = 1.9240e-04
Loss = 3.5278e-01, PNorm = 62.9372, GNorm = 1.4380, lr_0 = 1.9226e-04
Loss = 4.0343e-01, PNorm = 62.9375, GNorm = 1.1845, lr_0 = 1.9213e-04
Loss = 3.4055e-01, PNorm = 62.9390, GNorm = 1.0031, lr_0 = 1.9200e-04
Loss = 3.8035e-01, PNorm = 62.9411, GNorm = 1.2712, lr_0 = 1.9187e-04
Loss = 3.8921e-01, PNorm = 62.9415, GNorm = 1.8754, lr_0 = 1.9174e-04
Loss = 3.6690e-01, PNorm = 62.9417, GNorm = 1.6503, lr_0 = 1.9161e-04
Loss = 3.7862e-01, PNorm = 62.9433, GNorm = 1.4958, lr_0 = 1.9148e-04
Loss = 3.7234e-01, PNorm = 62.9445, GNorm = 1.1650, lr_0 = 1.9134e-04
Loss = 3.3181e-01, PNorm = 62.9456, GNorm = 1.0058, lr_0 = 1.9121e-04
Loss = 3.6579e-01, PNorm = 62.9484, GNorm = 1.7368, lr_0 = 1.9108e-04
Loss = 2.7098e-01, PNorm = 62.9530, GNorm = 1.0035, lr_0 = 1.9095e-04
Loss = 3.7632e-01, PNorm = 62.9540, GNorm = 1.1629, lr_0 = 1.9082e-04
Loss = 3.5095e-01, PNorm = 62.9576, GNorm = 1.5450, lr_0 = 1.9069e-04
Loss = 3.4537e-01, PNorm = 62.9591, GNorm = 1.4019, lr_0 = 1.9056e-04
Loss = 3.4657e-01, PNorm = 62.9582, GNorm = 1.7206, lr_0 = 1.9043e-04
Loss = 3.4527e-01, PNorm = 62.9577, GNorm = 1.4005, lr_0 = 1.9030e-04
Loss = 3.7049e-01, PNorm = 62.9585, GNorm = 1.3927, lr_0 = 1.9017e-04
Loss = 3.5534e-01, PNorm = 62.9600, GNorm = 0.9085, lr_0 = 1.9004e-04
Loss = 3.5642e-01, PNorm = 62.9616, GNorm = 1.4709, lr_0 = 1.8991e-04
Loss = 3.7748e-01, PNorm = 62.9645, GNorm = 1.4425, lr_0 = 1.8978e-04
Loss = 3.5019e-01, PNorm = 62.9630, GNorm = 1.5087, lr_0 = 1.8965e-04
Loss = 3.8832e-01, PNorm = 62.9633, GNorm = 2.0854, lr_0 = 1.8952e-04
Loss = 3.4225e-01, PNorm = 62.9649, GNorm = 1.7081, lr_0 = 1.8939e-04
Loss = 2.9306e-01, PNorm = 62.9661, GNorm = 0.9202, lr_0 = 1.8926e-04
Loss = 3.8066e-01, PNorm = 62.9702, GNorm = 1.1948, lr_0 = 1.8913e-04
Loss = 3.6538e-01, PNorm = 62.9723, GNorm = 1.5790, lr_0 = 1.8900e-04
Loss = 3.5512e-01, PNorm = 62.9734, GNorm = 1.4483, lr_0 = 1.8887e-04
Loss = 2.8461e-01, PNorm = 62.9791, GNorm = 1.3779, lr_0 = 1.8874e-04
Loss = 3.8027e-01, PNorm = 62.9775, GNorm = 1.0073, lr_0 = 1.8861e-04
Loss = 3.7722e-01, PNorm = 62.9768, GNorm = 1.7375, lr_0 = 1.8848e-04
Loss = 3.7549e-01, PNorm = 62.9803, GNorm = 1.7263, lr_0 = 1.8835e-04
Loss = 3.4245e-01, PNorm = 62.9815, GNorm = 1.7029, lr_0 = 1.8822e-04
Loss = 3.4520e-01, PNorm = 62.9844, GNorm = 1.2411, lr_0 = 1.8809e-04
Loss = 4.2881e-01, PNorm = 62.9873, GNorm = 1.3118, lr_0 = 1.8797e-04
Loss = 3.7474e-01, PNorm = 62.9892, GNorm = 1.3224, lr_0 = 1.8784e-04
Loss = 3.6134e-01, PNorm = 62.9900, GNorm = 1.6731, lr_0 = 1.8771e-04
Loss = 3.5576e-01, PNorm = 62.9919, GNorm = 1.5254, lr_0 = 1.8758e-04
Loss = 3.9871e-01, PNorm = 62.9929, GNorm = 2.2971, lr_0 = 1.8745e-04
Loss = 3.1401e-01, PNorm = 62.9932, GNorm = 1.4904, lr_0 = 1.8732e-04
Loss = 3.4136e-01, PNorm = 62.9934, GNorm = 1.3754, lr_0 = 1.8719e-04
Loss = 3.6172e-01, PNorm = 62.9942, GNorm = 1.5390, lr_0 = 1.8707e-04
Loss = 3.3976e-01, PNorm = 62.9971, GNorm = 1.3651, lr_0 = 1.8694e-04
Loss = 3.5562e-01, PNorm = 63.0013, GNorm = 2.2901, lr_0 = 1.8681e-04
Loss = 3.8231e-01, PNorm = 63.0021, GNorm = 1.2926, lr_0 = 1.8668e-04
Loss = 3.7155e-01, PNorm = 63.0041, GNorm = 1.7341, lr_0 = 1.8655e-04
Loss = 3.4616e-01, PNorm = 63.0088, GNorm = 1.6550, lr_0 = 1.8643e-04
Loss = 3.4075e-01, PNorm = 63.0107, GNorm = 1.4677, lr_0 = 1.8630e-04
Loss = 3.3880e-01, PNorm = 63.0129, GNorm = 1.0721, lr_0 = 1.8617e-04
Loss = 3.6769e-01, PNorm = 63.0155, GNorm = 1.5860, lr_0 = 1.8604e-04
Loss = 3.6755e-01, PNorm = 63.0176, GNorm = 1.3015, lr_0 = 1.8592e-04
Loss = 3.5294e-01, PNorm = 63.0186, GNorm = 1.6162, lr_0 = 1.8579e-04
Loss = 3.9532e-01, PNorm = 63.0203, GNorm = 1.4272, lr_0 = 1.8566e-04
Loss = 3.8973e-01, PNorm = 63.0226, GNorm = 1.4986, lr_0 = 1.8553e-04
Loss = 3.2973e-01, PNorm = 63.0246, GNorm = 1.9780, lr_0 = 1.8541e-04
Loss = 3.3057e-01, PNorm = 63.0275, GNorm = 1.1753, lr_0 = 1.8528e-04
Loss = 3.4802e-01, PNorm = 63.0300, GNorm = 1.6329, lr_0 = 1.8515e-04
Loss = 3.1668e-01, PNorm = 63.0340, GNorm = 1.0838, lr_0 = 1.8503e-04
Loss = 3.4267e-01, PNorm = 63.0332, GNorm = 1.3995, lr_0 = 1.8490e-04
Loss = 3.9828e-01, PNorm = 63.0334, GNorm = 1.9714, lr_0 = 1.8477e-04
Loss = 3.7972e-01, PNorm = 63.0373, GNorm = 1.1540, lr_0 = 1.8465e-04
Loss = 3.5394e-01, PNorm = 63.0415, GNorm = 1.3266, lr_0 = 1.8452e-04
Loss = 3.5541e-01, PNorm = 63.0422, GNorm = 1.6265, lr_0 = 1.8439e-04
Loss = 3.3825e-01, PNorm = 63.0448, GNorm = 1.6197, lr_0 = 1.8427e-04
Loss = 3.1513e-01, PNorm = 63.0468, GNorm = 1.6739, lr_0 = 1.8414e-04
Loss = 3.4285e-01, PNorm = 63.0491, GNorm = 1.3248, lr_0 = 1.8401e-04
Loss = 3.0589e-01, PNorm = 63.0519, GNorm = 1.4082, lr_0 = 1.8389e-04
Loss = 4.1984e-01, PNorm = 63.0522, GNorm = 1.7571, lr_0 = 1.8376e-04
Loss = 3.8702e-01, PNorm = 63.0530, GNorm = 1.1648, lr_0 = 1.8364e-04
Loss = 4.4898e-01, PNorm = 63.0538, GNorm = 1.5646, lr_0 = 1.8351e-04
Loss = 3.3611e-01, PNorm = 63.0571, GNorm = 1.5442, lr_0 = 1.8338e-04
Loss = 3.3399e-01, PNorm = 63.0624, GNorm = 1.6006, lr_0 = 1.8326e-04
Loss = 3.2024e-01, PNorm = 63.0608, GNorm = 1.3388, lr_0 = 1.8313e-04
Loss = 3.1743e-01, PNorm = 63.0594, GNorm = 0.9689, lr_0 = 1.8301e-04
Loss = 3.6239e-01, PNorm = 63.0616, GNorm = 1.6970, lr_0 = 1.8288e-04
Loss = 3.4119e-01, PNorm = 63.0650, GNorm = 1.5475, lr_0 = 1.8276e-04
Loss = 3.2060e-01, PNorm = 63.0665, GNorm = 1.2168, lr_0 = 1.8263e-04
Loss = 3.7696e-01, PNorm = 63.0677, GNorm = 1.8684, lr_0 = 1.8251e-04
Loss = 3.8242e-01, PNorm = 63.0666, GNorm = 1.1871, lr_0 = 1.8238e-04
Loss = 3.3409e-01, PNorm = 63.0674, GNorm = 1.5819, lr_0 = 1.8226e-04
Loss = 3.8041e-01, PNorm = 63.0688, GNorm = 1.4932, lr_0 = 1.8213e-04
Loss = 3.2237e-01, PNorm = 63.0717, GNorm = 1.3025, lr_0 = 1.8201e-04
Loss = 3.8234e-01, PNorm = 63.0723, GNorm = 1.3632, lr_0 = 1.8188e-04
Loss = 3.9528e-01, PNorm = 63.0747, GNorm = 1.9160, lr_0 = 1.8176e-04
Loss = 3.2487e-01, PNorm = 63.0775, GNorm = 1.3591, lr_0 = 1.8163e-04
Loss = 3.3979e-01, PNorm = 63.0795, GNorm = 1.1502, lr_0 = 1.8151e-04
Loss = 3.5037e-01, PNorm = 63.0795, GNorm = 1.2213, lr_0 = 1.8138e-04
Loss = 3.6281e-01, PNorm = 63.0811, GNorm = 1.4233, lr_0 = 1.8126e-04
Loss = 3.3102e-01, PNorm = 63.0848, GNorm = 1.5925, lr_0 = 1.8114e-04
Loss = 3.6953e-01, PNorm = 63.0855, GNorm = 1.8121, lr_0 = 1.8101e-04
Loss = 3.3933e-01, PNorm = 63.0876, GNorm = 1.6514, lr_0 = 1.8089e-04
Loss = 4.3299e-01, PNorm = 63.0922, GNorm = 1.8841, lr_0 = 1.8076e-04
Loss = 2.9567e-01, PNorm = 63.0970, GNorm = 0.9860, lr_0 = 1.8064e-04
Loss = 3.7699e-01, PNorm = 63.0951, GNorm = 2.0663, lr_0 = 1.8052e-04
Loss = 3.7185e-01, PNorm = 63.0970, GNorm = 1.4208, lr_0 = 1.8039e-04
Loss = 3.3415e-01, PNorm = 63.1000, GNorm = 1.1796, lr_0 = 1.8027e-04
Loss = 3.2638e-01, PNorm = 63.1011, GNorm = 1.2476, lr_0 = 1.8015e-04
Loss = 3.7975e-01, PNorm = 63.1034, GNorm = 1.6116, lr_0 = 1.8002e-04
Loss = 3.5478e-01, PNorm = 63.1075, GNorm = 1.1582, lr_0 = 1.7990e-04
Loss = 3.1633e-01, PNorm = 63.1085, GNorm = 1.2740, lr_0 = 1.7978e-04
Loss = 3.7710e-01, PNorm = 63.1109, GNorm = 1.2286, lr_0 = 1.7965e-04
Loss = 3.6207e-01, PNorm = 63.1131, GNorm = 1.6524, lr_0 = 1.7953e-04
Loss = 3.6732e-01, PNorm = 63.1137, GNorm = 1.2692, lr_0 = 1.7941e-04
Loss = 3.7609e-01, PNorm = 63.1139, GNorm = 1.4426, lr_0 = 1.7928e-04
Loss = 3.7842e-01, PNorm = 63.1165, GNorm = 1.5769, lr_0 = 1.7916e-04
Loss = 3.6968e-01, PNorm = 63.1186, GNorm = 1.3052, lr_0 = 1.7904e-04
Loss = 3.9942e-01, PNorm = 63.1200, GNorm = 1.8043, lr_0 = 1.7892e-04
Loss = 3.2055e-01, PNorm = 63.1211, GNorm = 1.5876, lr_0 = 1.7879e-04
Loss = 3.3384e-01, PNorm = 63.1239, GNorm = 1.6341, lr_0 = 1.7867e-04
Loss = 3.7945e-01, PNorm = 63.1238, GNorm = 1.3427, lr_0 = 1.7855e-04
Loss = 3.3877e-01, PNorm = 63.1243, GNorm = 1.1083, lr_0 = 1.7843e-04
Loss = 3.3791e-01, PNorm = 63.1262, GNorm = 1.5576, lr_0 = 1.7830e-04
Loss = 3.7206e-01, PNorm = 63.1292, GNorm = 1.6158, lr_0 = 1.7818e-04
Loss = 3.7636e-01, PNorm = 63.1333, GNorm = 1.7962, lr_0 = 1.7806e-04
Loss = 3.6040e-01, PNorm = 63.1367, GNorm = 0.9257, lr_0 = 1.7794e-04
Loss = 3.6716e-01, PNorm = 63.1390, GNorm = 1.4184, lr_0 = 1.7782e-04
Validation mae = 0.111202
Epoch 23
Loss = 3.2926e-01, PNorm = 63.1410, GNorm = 1.4133, lr_0 = 1.7769e-04
Loss = 3.0869e-01, PNorm = 63.1416, GNorm = 1.0151, lr_0 = 1.7757e-04
Loss = 3.5261e-01, PNorm = 63.1419, GNorm = 1.6222, lr_0 = 1.7745e-04
Loss = 3.6771e-01, PNorm = 63.1429, GNorm = 1.9375, lr_0 = 1.7733e-04
Loss = 3.0027e-01, PNorm = 63.1439, GNorm = 1.8656, lr_0 = 1.7721e-04
Loss = 3.4135e-01, PNorm = 63.1469, GNorm = 1.2096, lr_0 = 1.7709e-04
Loss = 3.6533e-01, PNorm = 63.1493, GNorm = 1.4803, lr_0 = 1.7696e-04
Loss = 3.4121e-01, PNorm = 63.1516, GNorm = 1.5402, lr_0 = 1.7684e-04
Loss = 3.4280e-01, PNorm = 63.1541, GNorm = 1.5833, lr_0 = 1.7672e-04
Loss = 4.2358e-01, PNorm = 63.1550, GNorm = 1.5895, lr_0 = 1.7660e-04
Loss = 3.3916e-01, PNorm = 63.1570, GNorm = 1.6939, lr_0 = 1.7648e-04
Loss = 3.5599e-01, PNorm = 63.1579, GNorm = 1.2911, lr_0 = 1.7636e-04
Loss = 3.2577e-01, PNorm = 63.1586, GNorm = 1.1170, lr_0 = 1.7624e-04
Loss = 3.2545e-01, PNorm = 63.1605, GNorm = 1.2346, lr_0 = 1.7612e-04
Loss = 4.0245e-01, PNorm = 63.1649, GNorm = 3.5222, lr_0 = 1.7600e-04
Loss = 3.6323e-01, PNorm = 63.1686, GNorm = 1.4364, lr_0 = 1.7588e-04
Loss = 3.7861e-01, PNorm = 63.1699, GNorm = 1.2171, lr_0 = 1.7576e-04
Loss = 3.5526e-01, PNorm = 63.1708, GNorm = 1.6734, lr_0 = 1.7564e-04
Loss = 3.6818e-01, PNorm = 63.1720, GNorm = 1.5981, lr_0 = 1.7552e-04
Loss = 3.0923e-01, PNorm = 63.1750, GNorm = 1.4441, lr_0 = 1.7540e-04
Loss = 2.9912e-01, PNorm = 63.1767, GNorm = 1.5178, lr_0 = 1.7528e-04
Loss = 3.9585e-01, PNorm = 63.1793, GNorm = 1.4964, lr_0 = 1.7516e-04
Loss = 3.1671e-01, PNorm = 63.1802, GNorm = 1.7445, lr_0 = 1.7504e-04
Loss = 3.5018e-01, PNorm = 63.1801, GNorm = 1.4158, lr_0 = 1.7492e-04
Loss = 3.2738e-01, PNorm = 63.1812, GNorm = 1.6782, lr_0 = 1.7480e-04
Loss = 3.0458e-01, PNorm = 63.1814, GNorm = 1.3998, lr_0 = 1.7468e-04
Loss = 3.5443e-01, PNorm = 63.1829, GNorm = 1.3870, lr_0 = 1.7456e-04
Loss = 3.5086e-01, PNorm = 63.1834, GNorm = 1.5460, lr_0 = 1.7444e-04
Loss = 3.8665e-01, PNorm = 63.1825, GNorm = 1.7000, lr_0 = 1.7432e-04
Loss = 3.6177e-01, PNorm = 63.1864, GNorm = 1.6836, lr_0 = 1.7420e-04
Loss = 3.4702e-01, PNorm = 63.1908, GNorm = 1.3206, lr_0 = 1.7408e-04
Loss = 2.9473e-01, PNorm = 63.1921, GNorm = 1.9820, lr_0 = 1.7396e-04
Loss = 3.5141e-01, PNorm = 63.1944, GNorm = 1.4796, lr_0 = 1.7384e-04
Loss = 3.5531e-01, PNorm = 63.1962, GNorm = 1.2039, lr_0 = 1.7372e-04
Loss = 3.6461e-01, PNorm = 63.1972, GNorm = 1.3295, lr_0 = 1.7360e-04
Loss = 3.5128e-01, PNorm = 63.1982, GNorm = 1.7502, lr_0 = 1.7348e-04
Loss = 3.7060e-01, PNorm = 63.2029, GNorm = 1.0580, lr_0 = 1.7336e-04
Loss = 3.5369e-01, PNorm = 63.2039, GNorm = 1.6217, lr_0 = 1.7325e-04
Loss = 3.9104e-01, PNorm = 63.2056, GNorm = 1.8432, lr_0 = 1.7313e-04
Loss = 3.2060e-01, PNorm = 63.2061, GNorm = 1.6968, lr_0 = 1.7301e-04
Loss = 3.5080e-01, PNorm = 63.2037, GNorm = 1.7899, lr_0 = 1.7289e-04
Loss = 4.2024e-01, PNorm = 63.2048, GNorm = 1.7163, lr_0 = 1.7277e-04
Loss = 3.1391e-01, PNorm = 63.2057, GNorm = 1.2377, lr_0 = 1.7265e-04
Loss = 3.5674e-01, PNorm = 63.2068, GNorm = 1.7004, lr_0 = 1.7253e-04
Loss = 3.6513e-01, PNorm = 63.2076, GNorm = 1.7249, lr_0 = 1.7242e-04
Loss = 3.2029e-01, PNorm = 63.2122, GNorm = 1.5736, lr_0 = 1.7230e-04
Loss = 3.4441e-01, PNorm = 63.2121, GNorm = 1.2018, lr_0 = 1.7218e-04
Loss = 3.2287e-01, PNorm = 63.2124, GNorm = 1.4468, lr_0 = 1.7206e-04
Loss = 3.2975e-01, PNorm = 63.2138, GNorm = 1.1547, lr_0 = 1.7194e-04
Loss = 3.2693e-01, PNorm = 63.2155, GNorm = 1.2334, lr_0 = 1.7183e-04
Loss = 4.5150e-01, PNorm = 63.2161, GNorm = 1.6383, lr_0 = 1.7171e-04
Loss = 4.0495e-01, PNorm = 63.2163, GNorm = 1.6334, lr_0 = 1.7159e-04
Loss = 3.8238e-01, PNorm = 63.2203, GNorm = 2.1642, lr_0 = 1.7147e-04
Loss = 3.7680e-01, PNorm = 63.2231, GNorm = 1.1594, lr_0 = 1.7136e-04
Loss = 3.2324e-01, PNorm = 63.2238, GNorm = 1.1868, lr_0 = 1.7124e-04
Loss = 3.8089e-01, PNorm = 63.2230, GNorm = 1.7451, lr_0 = 1.7112e-04
Loss = 4.3600e-01, PNorm = 63.2251, GNorm = 1.4038, lr_0 = 1.7100e-04
Loss = 4.0008e-01, PNorm = 63.2274, GNorm = 1.2354, lr_0 = 1.7089e-04
Loss = 4.3394e-01, PNorm = 63.2295, GNorm = 1.1602, lr_0 = 1.7077e-04
Loss = 3.6215e-01, PNorm = 63.2314, GNorm = 2.6548, lr_0 = 1.7065e-04
Loss = 3.5338e-01, PNorm = 63.2309, GNorm = 1.9444, lr_0 = 1.7054e-04
Loss = 3.6130e-01, PNorm = 63.2325, GNorm = 1.0487, lr_0 = 1.7042e-04
Loss = 3.8430e-01, PNorm = 63.2340, GNorm = 1.6753, lr_0 = 1.7030e-04
Loss = 3.8381e-01, PNorm = 63.2355, GNorm = 1.3613, lr_0 = 1.7019e-04
Loss = 3.6173e-01, PNorm = 63.2364, GNorm = 1.3680, lr_0 = 1.7007e-04
Loss = 3.8565e-01, PNorm = 63.2383, GNorm = 1.6461, lr_0 = 1.6995e-04
Loss = 3.9353e-01, PNorm = 63.2414, GNorm = 2.1992, lr_0 = 1.6984e-04
Loss = 3.1967e-01, PNorm = 63.2430, GNorm = 1.6268, lr_0 = 1.6972e-04
Loss = 3.8100e-01, PNorm = 63.2436, GNorm = 1.3073, lr_0 = 1.6960e-04
Loss = 3.6542e-01, PNorm = 63.2432, GNorm = 1.3149, lr_0 = 1.6949e-04
Loss = 4.0257e-01, PNorm = 63.2423, GNorm = 1.7258, lr_0 = 1.6937e-04
Loss = 3.2807e-01, PNorm = 63.2431, GNorm = 1.2525, lr_0 = 1.6926e-04
Loss = 3.5098e-01, PNorm = 63.2453, GNorm = 1.2529, lr_0 = 1.6914e-04
Loss = 3.4845e-01, PNorm = 63.2475, GNorm = 1.8079, lr_0 = 1.6902e-04
Loss = 3.4786e-01, PNorm = 63.2490, GNorm = 1.3632, lr_0 = 1.6891e-04
Loss = 3.5722e-01, PNorm = 63.2495, GNorm = 1.3335, lr_0 = 1.6879e-04
Loss = 3.5666e-01, PNorm = 63.2509, GNorm = 1.3328, lr_0 = 1.6868e-04
Loss = 3.4203e-01, PNorm = 63.2556, GNorm = 1.5000, lr_0 = 1.6856e-04
Loss = 3.3252e-01, PNorm = 63.2593, GNorm = 1.6438, lr_0 = 1.6845e-04
Loss = 3.6258e-01, PNorm = 63.2601, GNorm = 2.0363, lr_0 = 1.6833e-04
Loss = 3.6792e-01, PNorm = 63.2638, GNorm = 1.6418, lr_0 = 1.6821e-04
Loss = 3.4945e-01, PNorm = 63.2659, GNorm = 1.2227, lr_0 = 1.6810e-04
Loss = 3.4759e-01, PNorm = 63.2677, GNorm = 1.0684, lr_0 = 1.6798e-04
Loss = 3.7245e-01, PNorm = 63.2690, GNorm = 1.3781, lr_0 = 1.6787e-04
Loss = 3.5191e-01, PNorm = 63.2706, GNorm = 1.4133, lr_0 = 1.6775e-04
Loss = 3.1060e-01, PNorm = 63.2746, GNorm = 1.9730, lr_0 = 1.6764e-04
Loss = 3.1297e-01, PNorm = 63.2751, GNorm = 1.5137, lr_0 = 1.6752e-04
Loss = 2.9968e-01, PNorm = 63.2751, GNorm = 1.2842, lr_0 = 1.6741e-04
Loss = 3.1662e-01, PNorm = 63.2761, GNorm = 1.2817, lr_0 = 1.6729e-04
Loss = 3.2610e-01, PNorm = 63.2773, GNorm = 1.6749, lr_0 = 1.6718e-04
Loss = 3.2069e-01, PNorm = 63.2777, GNorm = 1.3616, lr_0 = 1.6707e-04
Loss = 4.0730e-01, PNorm = 63.2812, GNorm = 1.4069, lr_0 = 1.6695e-04
Loss = 4.4313e-01, PNorm = 63.2830, GNorm = 1.3248, lr_0 = 1.6684e-04
Loss = 3.5401e-01, PNorm = 63.2857, GNorm = 1.4872, lr_0 = 1.6672e-04
Loss = 4.0128e-01, PNorm = 63.2877, GNorm = 1.6504, lr_0 = 1.6661e-04
Loss = 3.2129e-01, PNorm = 63.2898, GNorm = 1.4362, lr_0 = 1.6649e-04
Loss = 3.1568e-01, PNorm = 63.2906, GNorm = 1.1089, lr_0 = 1.6638e-04
Loss = 3.6185e-01, PNorm = 63.2916, GNorm = 1.5714, lr_0 = 1.6627e-04
Loss = 3.2737e-01, PNorm = 63.2926, GNorm = 1.3724, lr_0 = 1.6615e-04
Loss = 4.3082e-01, PNorm = 63.2932, GNorm = 1.7552, lr_0 = 1.6604e-04
Loss = 4.0609e-01, PNorm = 63.2950, GNorm = 1.8414, lr_0 = 1.6592e-04
Loss = 3.7903e-01, PNorm = 63.2953, GNorm = 1.2617, lr_0 = 1.6581e-04
Loss = 3.3994e-01, PNorm = 63.2980, GNorm = 1.6864, lr_0 = 1.6570e-04
Loss = 3.6060e-01, PNorm = 63.3010, GNorm = 1.3183, lr_0 = 1.6558e-04
Loss = 3.4456e-01, PNorm = 63.3021, GNorm = 1.3341, lr_0 = 1.6547e-04
Loss = 3.7377e-01, PNorm = 63.3026, GNorm = 1.3528, lr_0 = 1.6536e-04
Loss = 3.3768e-01, PNorm = 63.3055, GNorm = 1.7978, lr_0 = 1.6524e-04
Loss = 4.1011e-01, PNorm = 63.3067, GNorm = 1.1822, lr_0 = 1.6513e-04
Loss = 2.9798e-01, PNorm = 63.3079, GNorm = 1.5655, lr_0 = 1.6502e-04
Loss = 3.4429e-01, PNorm = 63.3089, GNorm = 1.3350, lr_0 = 1.6490e-04
Loss = 3.5037e-01, PNorm = 63.3100, GNorm = 1.3837, lr_0 = 1.6479e-04
Loss = 3.5314e-01, PNorm = 63.3109, GNorm = 1.5134, lr_0 = 1.6468e-04
Loss = 3.6526e-01, PNorm = 63.3131, GNorm = 1.1563, lr_0 = 1.6457e-04
Loss = 3.6217e-01, PNorm = 63.3141, GNorm = 1.7618, lr_0 = 1.6445e-04
Loss = 3.1947e-01, PNorm = 63.3145, GNorm = 1.4974, lr_0 = 1.6434e-04
Loss = 3.3443e-01, PNorm = 63.3163, GNorm = 2.0404, lr_0 = 1.6423e-04
Loss = 3.4267e-01, PNorm = 63.3191, GNorm = 1.5000, lr_0 = 1.6412e-04
Loss = 3.1035e-01, PNorm = 63.3228, GNorm = 1.5718, lr_0 = 1.6400e-04
Loss = 3.4804e-01, PNorm = 63.3255, GNorm = 1.7293, lr_0 = 1.6389e-04
Loss = 3.8034e-01, PNorm = 63.3261, GNorm = 1.1013, lr_0 = 1.6378e-04
Validation mae = 0.111523
Epoch 24
Loss = 3.3522e-01, PNorm = 63.3277, GNorm = 1.4746, lr_0 = 1.6367e-04
Loss = 3.8364e-01, PNorm = 63.3305, GNorm = 1.7887, lr_0 = 1.6355e-04
Loss = 3.4868e-01, PNorm = 63.3328, GNorm = 1.7570, lr_0 = 1.6344e-04
Loss = 2.8839e-01, PNorm = 63.3350, GNorm = 1.0185, lr_0 = 1.6333e-04
Loss = 3.7711e-01, PNorm = 63.3362, GNorm = 1.5953, lr_0 = 1.6322e-04
Loss = 3.6999e-01, PNorm = 63.3363, GNorm = 1.5268, lr_0 = 1.6311e-04
Loss = 3.7906e-01, PNorm = 63.3391, GNorm = 1.2746, lr_0 = 1.6299e-04
Loss = 3.2903e-01, PNorm = 63.3414, GNorm = 2.1429, lr_0 = 1.6288e-04
Loss = 3.1669e-01, PNorm = 63.3424, GNorm = 1.2147, lr_0 = 1.6277e-04
Loss = 3.5838e-01, PNorm = 63.3439, GNorm = 1.5600, lr_0 = 1.6266e-04
Loss = 3.6144e-01, PNorm = 63.3464, GNorm = 1.7818, lr_0 = 1.6255e-04
Loss = 3.2662e-01, PNorm = 63.3489, GNorm = 2.3776, lr_0 = 1.6244e-04
Loss = 3.5119e-01, PNorm = 63.3494, GNorm = 1.8321, lr_0 = 1.6233e-04
Loss = 3.8859e-01, PNorm = 63.3519, GNorm = 0.9355, lr_0 = 1.6221e-04
Loss = 3.1844e-01, PNorm = 63.3533, GNorm = 1.5579, lr_0 = 1.6210e-04
Loss = 3.5392e-01, PNorm = 63.3533, GNorm = 1.5197, lr_0 = 1.6199e-04
Loss = 3.2990e-01, PNorm = 63.3542, GNorm = 1.2638, lr_0 = 1.6188e-04
Loss = 3.6680e-01, PNorm = 63.3569, GNorm = 1.0945, lr_0 = 1.6177e-04
Loss = 3.6901e-01, PNorm = 63.3576, GNorm = 1.4586, lr_0 = 1.6166e-04
Loss = 3.6980e-01, PNorm = 63.3580, GNorm = 2.0701, lr_0 = 1.6155e-04
Loss = 3.3302e-01, PNorm = 63.3612, GNorm = 1.3801, lr_0 = 1.6144e-04
Loss = 3.8387e-01, PNorm = 63.3616, GNorm = 1.3896, lr_0 = 1.6133e-04
Loss = 3.4408e-01, PNorm = 63.3630, GNorm = 1.6137, lr_0 = 1.6122e-04
Loss = 4.1054e-01, PNorm = 63.3660, GNorm = 2.4214, lr_0 = 1.6111e-04
Loss = 3.6316e-01, PNorm = 63.3650, GNorm = 1.4983, lr_0 = 1.6100e-04
Loss = 3.0061e-01, PNorm = 63.3662, GNorm = 1.3842, lr_0 = 1.6089e-04
Loss = 3.8431e-01, PNorm = 63.3673, GNorm = 1.5359, lr_0 = 1.6078e-04
Loss = 3.2977e-01, PNorm = 63.3686, GNorm = 1.8618, lr_0 = 1.6067e-04
Loss = 3.1817e-01, PNorm = 63.3679, GNorm = 1.3965, lr_0 = 1.6056e-04
Loss = 3.5863e-01, PNorm = 63.3705, GNorm = 1.2207, lr_0 = 1.6045e-04
Loss = 3.5625e-01, PNorm = 63.3717, GNorm = 1.8444, lr_0 = 1.6034e-04
Loss = 3.5801e-01, PNorm = 63.3719, GNorm = 1.7100, lr_0 = 1.6023e-04
Loss = 3.8532e-01, PNorm = 63.3747, GNorm = 1.4729, lr_0 = 1.6012e-04
Loss = 3.3202e-01, PNorm = 63.3786, GNorm = 1.5691, lr_0 = 1.6001e-04
Loss = 4.2364e-01, PNorm = 63.3814, GNorm = 1.7119, lr_0 = 1.5990e-04
Loss = 3.4777e-01, PNorm = 63.3836, GNorm = 1.5385, lr_0 = 1.5979e-04
Loss = 4.0902e-01, PNorm = 63.3853, GNorm = 1.8055, lr_0 = 1.5968e-04
Loss = 3.1125e-01, PNorm = 63.3864, GNorm = 1.3251, lr_0 = 1.5957e-04
Loss = 3.3643e-01, PNorm = 63.3887, GNorm = 1.9149, lr_0 = 1.5946e-04
Loss = 3.8241e-01, PNorm = 63.3885, GNorm = 1.3811, lr_0 = 1.5935e-04
Loss = 3.7224e-01, PNorm = 63.3916, GNorm = 1.6369, lr_0 = 1.5924e-04
Loss = 3.3778e-01, PNorm = 63.3959, GNorm = 1.2322, lr_0 = 1.5913e-04
Loss = 3.5809e-01, PNorm = 63.3969, GNorm = 1.9314, lr_0 = 1.5902e-04
Loss = 3.3443e-01, PNorm = 63.3981, GNorm = 1.5234, lr_0 = 1.5891e-04
Loss = 3.3102e-01, PNorm = 63.4018, GNorm = 1.2084, lr_0 = 1.5880e-04
Loss = 3.1075e-01, PNorm = 63.4043, GNorm = 1.2968, lr_0 = 1.5870e-04
Loss = 3.9240e-01, PNorm = 63.4048, GNorm = 2.2418, lr_0 = 1.5859e-04
Loss = 3.6053e-01, PNorm = 63.4054, GNorm = 1.2974, lr_0 = 1.5848e-04
Loss = 3.3440e-01, PNorm = 63.4063, GNorm = 1.1580, lr_0 = 1.5837e-04
Loss = 3.2910e-01, PNorm = 63.4089, GNorm = 1.3939, lr_0 = 1.5826e-04
Loss = 3.7746e-01, PNorm = 63.4096, GNorm = 1.3283, lr_0 = 1.5815e-04
Loss = 3.4557e-01, PNorm = 63.4088, GNorm = 1.3601, lr_0 = 1.5804e-04
Loss = 3.4517e-01, PNorm = 63.4122, GNorm = 1.7649, lr_0 = 1.5794e-04
Loss = 3.7485e-01, PNorm = 63.4136, GNorm = 1.7099, lr_0 = 1.5783e-04
Loss = 3.6056e-01, PNorm = 63.4143, GNorm = 1.2417, lr_0 = 1.5772e-04
Loss = 3.4716e-01, PNorm = 63.4147, GNorm = 1.6017, lr_0 = 1.5761e-04
Loss = 3.5562e-01, PNorm = 63.4162, GNorm = 1.8969, lr_0 = 1.5750e-04
Loss = 3.9796e-01, PNorm = 63.4173, GNorm = 1.7165, lr_0 = 1.5740e-04
Loss = 3.2177e-01, PNorm = 63.4156, GNorm = 1.5296, lr_0 = 1.5729e-04
Loss = 3.5347e-01, PNorm = 63.4153, GNorm = 1.6119, lr_0 = 1.5718e-04
Loss = 3.6268e-01, PNorm = 63.4162, GNorm = 1.3616, lr_0 = 1.5707e-04
Loss = 3.2744e-01, PNorm = 63.4179, GNorm = 0.9985, lr_0 = 1.5697e-04
Loss = 3.4243e-01, PNorm = 63.4220, GNorm = 1.0816, lr_0 = 1.5686e-04
Loss = 3.6169e-01, PNorm = 63.4247, GNorm = 1.7198, lr_0 = 1.5675e-04
Loss = 2.9919e-01, PNorm = 63.4272, GNorm = 1.1637, lr_0 = 1.5664e-04
Loss = 3.6435e-01, PNorm = 63.4267, GNorm = 1.5130, lr_0 = 1.5654e-04
Loss = 3.4703e-01, PNorm = 63.4267, GNorm = 1.3362, lr_0 = 1.5643e-04
Loss = 3.6364e-01, PNorm = 63.4292, GNorm = 1.3996, lr_0 = 1.5632e-04
Loss = 3.5614e-01, PNorm = 63.4305, GNorm = 1.4803, lr_0 = 1.5621e-04
Loss = 3.4016e-01, PNorm = 63.4311, GNorm = 1.6720, lr_0 = 1.5611e-04
Loss = 3.4335e-01, PNorm = 63.4312, GNorm = 1.3064, lr_0 = 1.5600e-04
Loss = 3.4871e-01, PNorm = 63.4315, GNorm = 1.4298, lr_0 = 1.5589e-04
Loss = 3.2624e-01, PNorm = 63.4325, GNorm = 1.2820, lr_0 = 1.5579e-04
Loss = 3.6720e-01, PNorm = 63.4331, GNorm = 1.9369, lr_0 = 1.5568e-04
Loss = 3.5004e-01, PNorm = 63.4357, GNorm = 1.4909, lr_0 = 1.5557e-04
Loss = 4.3125e-01, PNorm = 63.4353, GNorm = 2.4899, lr_0 = 1.5547e-04
Loss = 3.3698e-01, PNorm = 63.4360, GNorm = 1.3167, lr_0 = 1.5536e-04
Loss = 3.8591e-01, PNorm = 63.4385, GNorm = 2.3821, lr_0 = 1.5525e-04
Loss = 3.9635e-01, PNorm = 63.4413, GNorm = 1.5191, lr_0 = 1.5515e-04
Loss = 3.2357e-01, PNorm = 63.4440, GNorm = 1.6577, lr_0 = 1.5504e-04
Loss = 2.7734e-01, PNorm = 63.4441, GNorm = 1.0069, lr_0 = 1.5493e-04
Loss = 3.8276e-01, PNorm = 63.4454, GNorm = 1.7703, lr_0 = 1.5483e-04
Loss = 3.4941e-01, PNorm = 63.4479, GNorm = 1.5652, lr_0 = 1.5472e-04
Loss = 3.4911e-01, PNorm = 63.4489, GNorm = 1.6517, lr_0 = 1.5462e-04
Loss = 2.8411e-01, PNorm = 63.4504, GNorm = 1.6392, lr_0 = 1.5451e-04
Loss = 3.6182e-01, PNorm = 63.4531, GNorm = 1.4694, lr_0 = 1.5440e-04
Loss = 3.4831e-01, PNorm = 63.4522, GNorm = 1.4914, lr_0 = 1.5430e-04
Loss = 3.4016e-01, PNorm = 63.4531, GNorm = 1.5633, lr_0 = 1.5419e-04
Loss = 3.9348e-01, PNorm = 63.4563, GNorm = 1.2701, lr_0 = 1.5409e-04
Loss = 3.6713e-01, PNorm = 63.4580, GNorm = 1.3843, lr_0 = 1.5398e-04
Loss = 3.1867e-01, PNorm = 63.4611, GNorm = 2.5293, lr_0 = 1.5388e-04
Loss = 3.4279e-01, PNorm = 63.4620, GNorm = 1.2404, lr_0 = 1.5377e-04
Loss = 3.3317e-01, PNorm = 63.4613, GNorm = 2.0710, lr_0 = 1.5367e-04
Loss = 3.7158e-01, PNorm = 63.4619, GNorm = 1.6067, lr_0 = 1.5356e-04
Loss = 3.3628e-01, PNorm = 63.4651, GNorm = 1.2694, lr_0 = 1.5346e-04
Loss = 3.7393e-01, PNorm = 63.4678, GNorm = 1.6307, lr_0 = 1.5335e-04
Loss = 3.3051e-01, PNorm = 63.4704, GNorm = 1.7249, lr_0 = 1.5325e-04
Loss = 3.2659e-01, PNorm = 63.4716, GNorm = 1.7742, lr_0 = 1.5314e-04
Loss = 3.6306e-01, PNorm = 63.4739, GNorm = 1.3230, lr_0 = 1.5304e-04
Loss = 3.2527e-01, PNorm = 63.4738, GNorm = 1.0176, lr_0 = 1.5293e-04
Loss = 3.3739e-01, PNorm = 63.4768, GNorm = 1.7210, lr_0 = 1.5283e-04
Loss = 4.1645e-01, PNorm = 63.4778, GNorm = 2.0846, lr_0 = 1.5272e-04
Loss = 3.2209e-01, PNorm = 63.4828, GNorm = 1.4755, lr_0 = 1.5262e-04
Loss = 3.8655e-01, PNorm = 63.4867, GNorm = 1.6034, lr_0 = 1.5251e-04
Loss = 3.8529e-01, PNorm = 63.4873, GNorm = 1.5122, lr_0 = 1.5241e-04
Loss = 3.7306e-01, PNorm = 63.4889, GNorm = 1.8606, lr_0 = 1.5230e-04
Loss = 4.1201e-01, PNorm = 63.4909, GNorm = 2.0838, lr_0 = 1.5220e-04
Loss = 3.4301e-01, PNorm = 63.4925, GNorm = 1.2804, lr_0 = 1.5209e-04
Loss = 3.6978e-01, PNorm = 63.4939, GNorm = 1.2051, lr_0 = 1.5199e-04
Loss = 3.5011e-01, PNorm = 63.4942, GNorm = 1.6972, lr_0 = 1.5189e-04
Loss = 3.2870e-01, PNorm = 63.4955, GNorm = 0.9907, lr_0 = 1.5178e-04
Loss = 3.4020e-01, PNorm = 63.4981, GNorm = 2.1561, lr_0 = 1.5168e-04
Loss = 3.4651e-01, PNorm = 63.4981, GNorm = 1.2471, lr_0 = 1.5157e-04
Loss = 3.8080e-01, PNorm = 63.4992, GNorm = 1.3646, lr_0 = 1.5147e-04
Loss = 3.2788e-01, PNorm = 63.4990, GNorm = 1.7843, lr_0 = 1.5137e-04
Loss = 3.4968e-01, PNorm = 63.4998, GNorm = 0.8962, lr_0 = 1.5126e-04
Loss = 3.4193e-01, PNorm = 63.5007, GNorm = 1.3570, lr_0 = 1.5116e-04
Loss = 3.5775e-01, PNorm = 63.5029, GNorm = 1.1457, lr_0 = 1.5106e-04
Loss = 3.5801e-01, PNorm = 63.5041, GNorm = 1.3715, lr_0 = 1.5095e-04
Loss = 3.3414e-01, PNorm = 63.5049, GNorm = 1.3414, lr_0 = 1.5085e-04
Validation mae = 0.112119
Epoch 25
Loss = 3.3625e-01, PNorm = 63.5068, GNorm = 1.3769, lr_0 = 1.5075e-04
Loss = 3.8247e-01, PNorm = 63.5065, GNorm = 1.6390, lr_0 = 1.5064e-04
Loss = 3.3741e-01, PNorm = 63.5073, GNorm = 2.0986, lr_0 = 1.5054e-04
Loss = 3.8417e-01, PNorm = 63.5072, GNorm = 1.1693, lr_0 = 1.5044e-04
Loss = 3.5200e-01, PNorm = 63.5086, GNorm = 1.5733, lr_0 = 1.5033e-04
Loss = 3.7348e-01, PNorm = 63.5092, GNorm = 1.7793, lr_0 = 1.5023e-04
Loss = 3.1730e-01, PNorm = 63.5097, GNorm = 1.4805, lr_0 = 1.5013e-04
Loss = 2.9868e-01, PNorm = 63.5117, GNorm = 1.0282, lr_0 = 1.5002e-04
Loss = 3.9483e-01, PNorm = 63.5125, GNorm = 1.2800, lr_0 = 1.4992e-04
Loss = 3.3185e-01, PNorm = 63.5129, GNorm = 1.4856, lr_0 = 1.4982e-04
Loss = 3.0350e-01, PNorm = 63.5152, GNorm = 1.4353, lr_0 = 1.4972e-04
Loss = 3.4134e-01, PNorm = 63.5180, GNorm = 1.4812, lr_0 = 1.4961e-04
Loss = 3.2930e-01, PNorm = 63.5186, GNorm = 1.7751, lr_0 = 1.4951e-04
Loss = 2.9247e-01, PNorm = 63.5206, GNorm = 1.3773, lr_0 = 1.4941e-04
Loss = 3.8060e-01, PNorm = 63.5203, GNorm = 1.8453, lr_0 = 1.4931e-04
Loss = 3.3150e-01, PNorm = 63.5205, GNorm = 1.3939, lr_0 = 1.4920e-04
Loss = 3.1388e-01, PNorm = 63.5218, GNorm = 1.5934, lr_0 = 1.4910e-04
Loss = 3.4379e-01, PNorm = 63.5233, GNorm = 2.5508, lr_0 = 1.4900e-04
Loss = 3.1833e-01, PNorm = 63.5254, GNorm = 1.2837, lr_0 = 1.4890e-04
Loss = 3.7555e-01, PNorm = 63.5262, GNorm = 1.9039, lr_0 = 1.4880e-04
Loss = 3.6227e-01, PNorm = 63.5267, GNorm = 1.4863, lr_0 = 1.4869e-04
Loss = 3.4506e-01, PNorm = 63.5282, GNorm = 1.6156, lr_0 = 1.4859e-04
Loss = 3.4970e-01, PNorm = 63.5288, GNorm = 1.9012, lr_0 = 1.4849e-04
Loss = 3.0818e-01, PNorm = 63.5290, GNorm = 1.4576, lr_0 = 1.4839e-04
Loss = 3.7994e-01, PNorm = 63.5295, GNorm = 1.5574, lr_0 = 1.4829e-04
Loss = 3.2918e-01, PNorm = 63.5306, GNorm = 1.3190, lr_0 = 1.4818e-04
Loss = 4.1135e-01, PNorm = 63.5311, GNorm = 1.2938, lr_0 = 1.4808e-04
Loss = 3.5523e-01, PNorm = 63.5340, GNorm = 1.3552, lr_0 = 1.4798e-04
Loss = 3.2993e-01, PNorm = 63.5359, GNorm = 1.0587, lr_0 = 1.4788e-04
Loss = 3.2197e-01, PNorm = 63.5388, GNorm = 1.2946, lr_0 = 1.4778e-04
Loss = 3.7611e-01, PNorm = 63.5410, GNorm = 1.5441, lr_0 = 1.4768e-04
Loss = 3.8902e-01, PNorm = 63.5429, GNorm = 1.9293, lr_0 = 1.4758e-04
Loss = 3.6171e-01, PNorm = 63.5444, GNorm = 1.2100, lr_0 = 1.4748e-04
Loss = 3.6031e-01, PNorm = 63.5443, GNorm = 1.4730, lr_0 = 1.4737e-04
Loss = 3.3569e-01, PNorm = 63.5447, GNorm = 1.5414, lr_0 = 1.4727e-04
Loss = 4.0302e-01, PNorm = 63.5444, GNorm = 1.7414, lr_0 = 1.4717e-04
Loss = 3.3986e-01, PNorm = 63.5473, GNorm = 1.7595, lr_0 = 1.4707e-04
Loss = 3.4691e-01, PNorm = 63.5492, GNorm = 1.8000, lr_0 = 1.4697e-04
Loss = 3.6271e-01, PNorm = 63.5485, GNorm = 1.4673, lr_0 = 1.4687e-04
Loss = 3.4767e-01, PNorm = 63.5525, GNorm = 1.4337, lr_0 = 1.4677e-04
Loss = 3.4081e-01, PNorm = 63.5555, GNorm = 1.4153, lr_0 = 1.4667e-04
Loss = 3.2947e-01, PNorm = 63.5566, GNorm = 1.1265, lr_0 = 1.4657e-04
Loss = 3.3252e-01, PNorm = 63.5559, GNorm = 1.5034, lr_0 = 1.4647e-04
Loss = 3.9017e-01, PNorm = 63.5583, GNorm = 2.3879, lr_0 = 1.4637e-04
Loss = 3.7530e-01, PNorm = 63.5602, GNorm = 2.0027, lr_0 = 1.4627e-04
Loss = 3.5113e-01, PNorm = 63.5614, GNorm = 2.1875, lr_0 = 1.4617e-04
Loss = 3.7327e-01, PNorm = 63.5629, GNorm = 1.8114, lr_0 = 1.4607e-04
Loss = 4.0917e-01, PNorm = 63.5633, GNorm = 1.3051, lr_0 = 1.4597e-04
Loss = 3.5911e-01, PNorm = 63.5656, GNorm = 1.1079, lr_0 = 1.4587e-04
Loss = 3.2558e-01, PNorm = 63.5682, GNorm = 2.3943, lr_0 = 1.4577e-04
Loss = 3.9044e-01, PNorm = 63.5711, GNorm = 1.4073, lr_0 = 1.4567e-04
Loss = 3.3405e-01, PNorm = 63.5733, GNorm = 1.6782, lr_0 = 1.4557e-04
Loss = 3.0919e-01, PNorm = 63.5743, GNorm = 1.6202, lr_0 = 1.4547e-04
Loss = 3.7963e-01, PNorm = 63.5755, GNorm = 1.6662, lr_0 = 1.4537e-04
Loss = 3.2894e-01, PNorm = 63.5764, GNorm = 1.5128, lr_0 = 1.4527e-04
Loss = 3.5035e-01, PNorm = 63.5789, GNorm = 1.7318, lr_0 = 1.4517e-04
Loss = 3.6678e-01, PNorm = 63.5814, GNorm = 1.6338, lr_0 = 1.4507e-04
Loss = 3.3905e-01, PNorm = 63.5835, GNorm = 1.8301, lr_0 = 1.4497e-04
Loss = 3.4538e-01, PNorm = 63.5839, GNorm = 1.5736, lr_0 = 1.4487e-04
Loss = 3.4205e-01, PNorm = 63.5850, GNorm = 1.7698, lr_0 = 1.4477e-04
Loss = 3.3190e-01, PNorm = 63.5864, GNorm = 1.3466, lr_0 = 1.4467e-04
Loss = 3.2652e-01, PNorm = 63.5876, GNorm = 1.3572, lr_0 = 1.4457e-04
Loss = 3.5696e-01, PNorm = 63.5908, GNorm = 1.2444, lr_0 = 1.4447e-04
Loss = 3.5077e-01, PNorm = 63.5942, GNorm = 1.1627, lr_0 = 1.4438e-04
Loss = 3.7590e-01, PNorm = 63.5952, GNorm = 1.4053, lr_0 = 1.4428e-04
Loss = 4.1070e-01, PNorm = 63.5944, GNorm = 1.7593, lr_0 = 1.4418e-04
Loss = 3.7675e-01, PNorm = 63.5960, GNorm = 1.4632, lr_0 = 1.4408e-04
Loss = 3.2415e-01, PNorm = 63.5972, GNorm = 1.4085, lr_0 = 1.4398e-04
Loss = 3.6196e-01, PNorm = 63.5983, GNorm = 1.3078, lr_0 = 1.4388e-04
Loss = 3.8351e-01, PNorm = 63.5974, GNorm = 1.2673, lr_0 = 1.4378e-04
Loss = 3.3326e-01, PNorm = 63.5986, GNorm = 1.2135, lr_0 = 1.4368e-04
Loss = 3.3293e-01, PNorm = 63.6008, GNorm = 1.4906, lr_0 = 1.4359e-04
Loss = 3.6709e-01, PNorm = 63.6029, GNorm = 1.7485, lr_0 = 1.4349e-04
Loss = 3.1185e-01, PNorm = 63.6022, GNorm = 1.1044, lr_0 = 1.4339e-04
Loss = 3.7862e-01, PNorm = 63.6050, GNorm = 1.5106, lr_0 = 1.4329e-04
Loss = 3.2847e-01, PNorm = 63.6057, GNorm = 1.2140, lr_0 = 1.4319e-04
Loss = 3.7779e-01, PNorm = 63.6057, GNorm = 1.4979, lr_0 = 1.4310e-04
Loss = 3.4085e-01, PNorm = 63.6067, GNorm = 1.5822, lr_0 = 1.4300e-04
Loss = 3.7933e-01, PNorm = 63.6088, GNorm = 1.6898, lr_0 = 1.4290e-04
Loss = 3.7939e-01, PNorm = 63.6110, GNorm = 1.2540, lr_0 = 1.4280e-04
Loss = 3.5902e-01, PNorm = 63.6117, GNorm = 1.4623, lr_0 = 1.4270e-04
Loss = 3.6513e-01, PNorm = 63.6119, GNorm = 1.7875, lr_0 = 1.4261e-04
Loss = 3.4966e-01, PNorm = 63.6112, GNorm = 1.4734, lr_0 = 1.4251e-04
Loss = 3.7198e-01, PNorm = 63.6141, GNorm = 1.5435, lr_0 = 1.4241e-04
Loss = 3.5782e-01, PNorm = 63.6164, GNorm = 1.2098, lr_0 = 1.4231e-04
Loss = 3.2880e-01, PNorm = 63.6167, GNorm = 1.3776, lr_0 = 1.4222e-04
Loss = 3.3292e-01, PNorm = 63.6178, GNorm = 1.4589, lr_0 = 1.4212e-04
Loss = 3.0688e-01, PNorm = 63.6193, GNorm = 1.5938, lr_0 = 1.4202e-04
Loss = 3.7997e-01, PNorm = 63.6199, GNorm = 1.4499, lr_0 = 1.4192e-04
Loss = 3.3281e-01, PNorm = 63.6208, GNorm = 1.3470, lr_0 = 1.4183e-04
Loss = 3.0918e-01, PNorm = 63.6238, GNorm = 1.5270, lr_0 = 1.4173e-04
Loss = 3.1948e-01, PNorm = 63.6255, GNorm = 1.1291, lr_0 = 1.4163e-04
Loss = 2.9080e-01, PNorm = 63.6262, GNorm = 1.1398, lr_0 = 1.4153e-04
Loss = 3.7184e-01, PNorm = 63.6276, GNorm = 2.1140, lr_0 = 1.4144e-04
Loss = 3.1476e-01, PNorm = 63.6295, GNorm = 1.4073, lr_0 = 1.4134e-04
Loss = 3.0906e-01, PNorm = 63.6305, GNorm = 1.4869, lr_0 = 1.4124e-04
Loss = 3.6868e-01, PNorm = 63.6314, GNorm = 1.7676, lr_0 = 1.4115e-04
Loss = 3.5861e-01, PNorm = 63.6335, GNorm = 1.7023, lr_0 = 1.4105e-04
Loss = 3.6855e-01, PNorm = 63.6342, GNorm = 1.8875, lr_0 = 1.4095e-04
Loss = 3.3905e-01, PNorm = 63.6371, GNorm = 1.1967, lr_0 = 1.4086e-04
Loss = 3.5405e-01, PNorm = 63.6383, GNorm = 1.6248, lr_0 = 1.4076e-04
Loss = 3.3742e-01, PNorm = 63.6403, GNorm = 2.3618, lr_0 = 1.4066e-04
Loss = 3.3280e-01, PNorm = 63.6431, GNorm = 2.1040, lr_0 = 1.4057e-04
Loss = 3.2355e-01, PNorm = 63.6423, GNorm = 1.6114, lr_0 = 1.4047e-04
Loss = 3.6962e-01, PNorm = 63.6435, GNorm = 1.4930, lr_0 = 1.4038e-04
Loss = 3.2544e-01, PNorm = 63.6443, GNorm = 1.3736, lr_0 = 1.4028e-04
Loss = 3.6201e-01, PNorm = 63.6432, GNorm = 1.4947, lr_0 = 1.4018e-04
Loss = 3.4913e-01, PNorm = 63.6448, GNorm = 1.3959, lr_0 = 1.4009e-04
Loss = 3.1580e-01, PNorm = 63.6465, GNorm = 1.1064, lr_0 = 1.3999e-04
Loss = 3.2621e-01, PNorm = 63.6491, GNorm = 1.8889, lr_0 = 1.3990e-04
Loss = 3.3372e-01, PNorm = 63.6514, GNorm = 2.1101, lr_0 = 1.3980e-04
Loss = 3.9212e-01, PNorm = 63.6519, GNorm = 1.3228, lr_0 = 1.3970e-04
Loss = 3.6212e-01, PNorm = 63.6514, GNorm = 1.2980, lr_0 = 1.3961e-04
Loss = 3.3834e-01, PNorm = 63.6528, GNorm = 1.4292, lr_0 = 1.3951e-04
Loss = 3.8615e-01, PNorm = 63.6559, GNorm = 1.0709, lr_0 = 1.3942e-04
Loss = 3.4159e-01, PNorm = 63.6577, GNorm = 1.6785, lr_0 = 1.3932e-04
Loss = 3.4403e-01, PNorm = 63.6573, GNorm = 1.4167, lr_0 = 1.3923e-04
Loss = 3.7356e-01, PNorm = 63.6578, GNorm = 1.8738, lr_0 = 1.3913e-04
Loss = 3.4346e-01, PNorm = 63.6590, GNorm = 1.6310, lr_0 = 1.3904e-04
Loss = 3.7845e-01, PNorm = 63.6608, GNorm = 1.7299, lr_0 = 1.3894e-04
Validation mae = 0.111685
Epoch 26
Loss = 3.3531e-01, PNorm = 63.6595, GNorm = 1.5016, lr_0 = 1.3884e-04
Loss = 3.7193e-01, PNorm = 63.6605, GNorm = 1.2442, lr_0 = 1.3875e-04
Loss = 3.2481e-01, PNorm = 63.6616, GNorm = 1.9358, lr_0 = 1.3865e-04
Loss = 3.3358e-01, PNorm = 63.6613, GNorm = 1.4069, lr_0 = 1.3856e-04
Loss = 3.9707e-01, PNorm = 63.6634, GNorm = 1.4302, lr_0 = 1.3846e-04
Loss = 3.5894e-01, PNorm = 63.6641, GNorm = 1.1020, lr_0 = 1.3837e-04
Loss = 3.6967e-01, PNorm = 63.6656, GNorm = 1.9612, lr_0 = 1.3828e-04
Loss = 3.1474e-01, PNorm = 63.6654, GNorm = 1.4275, lr_0 = 1.3818e-04
Loss = 4.0619e-01, PNorm = 63.6651, GNorm = 1.4921, lr_0 = 1.3809e-04
Loss = 3.3107e-01, PNorm = 63.6674, GNorm = 1.5421, lr_0 = 1.3799e-04
Loss = 2.9515e-01, PNorm = 63.6699, GNorm = 1.3412, lr_0 = 1.3790e-04
Loss = 3.7164e-01, PNorm = 63.6709, GNorm = 1.5305, lr_0 = 1.3780e-04
Loss = 3.2653e-01, PNorm = 63.6743, GNorm = 1.7765, lr_0 = 1.3771e-04
Loss = 3.4176e-01, PNorm = 63.6749, GNorm = 1.3213, lr_0 = 1.3761e-04
Loss = 3.4910e-01, PNorm = 63.6759, GNorm = 1.1785, lr_0 = 1.3752e-04
Loss = 3.4752e-01, PNorm = 63.6788, GNorm = 1.1676, lr_0 = 1.3742e-04
Loss = 3.5416e-01, PNorm = 63.6808, GNorm = 1.6671, lr_0 = 1.3733e-04
Loss = 3.3841e-01, PNorm = 63.6796, GNorm = 1.4309, lr_0 = 1.3724e-04
Loss = 3.3985e-01, PNorm = 63.6807, GNorm = 1.5249, lr_0 = 1.3714e-04
Loss = 3.1381e-01, PNorm = 63.6826, GNorm = 1.5475, lr_0 = 1.3705e-04
Loss = 3.0010e-01, PNorm = 63.6835, GNorm = 1.0450, lr_0 = 1.3695e-04
Loss = 3.6906e-01, PNorm = 63.6846, GNorm = 1.7798, lr_0 = 1.3686e-04
Loss = 3.5294e-01, PNorm = 63.6857, GNorm = 1.1693, lr_0 = 1.3677e-04
Loss = 3.6898e-01, PNorm = 63.6857, GNorm = 1.2639, lr_0 = 1.3667e-04
Loss = 3.5371e-01, PNorm = 63.6853, GNorm = 1.5929, lr_0 = 1.3658e-04
Loss = 2.9200e-01, PNorm = 63.6853, GNorm = 1.0529, lr_0 = 1.3649e-04
Loss = 3.2109e-01, PNorm = 63.6868, GNorm = 1.7919, lr_0 = 1.3639e-04
Loss = 3.4621e-01, PNorm = 63.6902, GNorm = 1.1215, lr_0 = 1.3630e-04
Loss = 4.0331e-01, PNorm = 63.6921, GNorm = 1.3721, lr_0 = 1.3621e-04
Loss = 3.1536e-01, PNorm = 63.6935, GNorm = 1.6987, lr_0 = 1.3611e-04
Loss = 3.2454e-01, PNorm = 63.6973, GNorm = 1.4088, lr_0 = 1.3602e-04
Loss = 3.5415e-01, PNorm = 63.6982, GNorm = 1.7299, lr_0 = 1.3593e-04
Loss = 3.0776e-01, PNorm = 63.6998, GNorm = 1.1249, lr_0 = 1.3583e-04
Loss = 3.1174e-01, PNorm = 63.6999, GNorm = 1.0955, lr_0 = 1.3574e-04
Loss = 3.1701e-01, PNorm = 63.7011, GNorm = 1.8623, lr_0 = 1.3565e-04
Loss = 3.6598e-01, PNorm = 63.7020, GNorm = 2.1516, lr_0 = 1.3555e-04
Loss = 3.4127e-01, PNorm = 63.7030, GNorm = 1.6806, lr_0 = 1.3546e-04
Loss = 3.2469e-01, PNorm = 63.7030, GNorm = 1.6624, lr_0 = 1.3537e-04
Loss = 3.8575e-01, PNorm = 63.7042, GNorm = 1.8524, lr_0 = 1.3528e-04
Loss = 3.7969e-01, PNorm = 63.7058, GNorm = 1.2789, lr_0 = 1.3518e-04
Loss = 3.5325e-01, PNorm = 63.7066, GNorm = 1.9208, lr_0 = 1.3509e-04
Loss = 3.3164e-01, PNorm = 63.7090, GNorm = 1.3738, lr_0 = 1.3500e-04
Loss = 3.0612e-01, PNorm = 63.7092, GNorm = 1.0462, lr_0 = 1.3491e-04
Loss = 3.0466e-01, PNorm = 63.7088, GNorm = 1.9179, lr_0 = 1.3481e-04
Loss = 3.3865e-01, PNorm = 63.7094, GNorm = 1.3758, lr_0 = 1.3472e-04
Loss = 4.0762e-01, PNorm = 63.7108, GNorm = 1.3593, lr_0 = 1.3463e-04
Loss = 3.4279e-01, PNorm = 63.7137, GNorm = 1.1773, lr_0 = 1.3454e-04
Loss = 3.3803e-01, PNorm = 63.7144, GNorm = 1.4919, lr_0 = 1.3444e-04
Loss = 2.9283e-01, PNorm = 63.7176, GNorm = 1.5048, lr_0 = 1.3435e-04
Loss = 3.5048e-01, PNorm = 63.7195, GNorm = 2.1672, lr_0 = 1.3426e-04
Loss = 3.4106e-01, PNorm = 63.7191, GNorm = 1.4058, lr_0 = 1.3417e-04
Loss = 3.5890e-01, PNorm = 63.7211, GNorm = 2.2433, lr_0 = 1.3408e-04
Loss = 3.3160e-01, PNorm = 63.7248, GNorm = 1.6707, lr_0 = 1.3398e-04
Loss = 3.7078e-01, PNorm = 63.7257, GNorm = 1.8057, lr_0 = 1.3389e-04
Loss = 3.2148e-01, PNorm = 63.7253, GNorm = 1.2586, lr_0 = 1.3380e-04
Loss = 3.7040e-01, PNorm = 63.7260, GNorm = 1.3908, lr_0 = 1.3371e-04
Loss = 3.3645e-01, PNorm = 63.7275, GNorm = 1.4323, lr_0 = 1.3362e-04
Loss = 3.5137e-01, PNorm = 63.7292, GNorm = 1.4843, lr_0 = 1.3353e-04
Loss = 3.7689e-01, PNorm = 63.7293, GNorm = 1.8712, lr_0 = 1.3343e-04
Loss = 3.2312e-01, PNorm = 63.7312, GNorm = 1.4264, lr_0 = 1.3334e-04
Loss = 3.3452e-01, PNorm = 63.7323, GNorm = 1.4114, lr_0 = 1.3325e-04
Loss = 3.7724e-01, PNorm = 63.7327, GNorm = 1.3064, lr_0 = 1.3316e-04
Loss = 3.5972e-01, PNorm = 63.7349, GNorm = 1.3276, lr_0 = 1.3307e-04
Loss = 3.3556e-01, PNorm = 63.7357, GNorm = 1.7939, lr_0 = 1.3298e-04
Loss = 2.9396e-01, PNorm = 63.7359, GNorm = 1.0677, lr_0 = 1.3289e-04
Loss = 3.6922e-01, PNorm = 63.7372, GNorm = 1.7608, lr_0 = 1.3280e-04
Loss = 3.4120e-01, PNorm = 63.7371, GNorm = 1.8813, lr_0 = 1.3270e-04
Loss = 3.3981e-01, PNorm = 63.7376, GNorm = 1.2334, lr_0 = 1.3261e-04
Loss = 3.6188e-01, PNorm = 63.7380, GNorm = 1.3640, lr_0 = 1.3252e-04
Loss = 3.4774e-01, PNorm = 63.7402, GNorm = 1.2996, lr_0 = 1.3243e-04
Loss = 3.7366e-01, PNorm = 63.7407, GNorm = 1.9350, lr_0 = 1.3234e-04
Loss = 3.4369e-01, PNorm = 63.7412, GNorm = 1.3181, lr_0 = 1.3225e-04
Loss = 3.6911e-01, PNorm = 63.7426, GNorm = 1.7054, lr_0 = 1.3216e-04
Loss = 3.7718e-01, PNorm = 63.7450, GNorm = 1.6259, lr_0 = 1.3207e-04
Loss = 3.1135e-01, PNorm = 63.7485, GNorm = 1.4025, lr_0 = 1.3198e-04
Loss = 3.6267e-01, PNorm = 63.7501, GNorm = 1.3707, lr_0 = 1.3189e-04
Loss = 3.3614e-01, PNorm = 63.7504, GNorm = 2.1405, lr_0 = 1.3180e-04
Loss = 3.4513e-01, PNorm = 63.7498, GNorm = 1.3749, lr_0 = 1.3171e-04
Loss = 3.5827e-01, PNorm = 63.7510, GNorm = 1.4727, lr_0 = 1.3162e-04
Loss = 3.3040e-01, PNorm = 63.7530, GNorm = 1.2026, lr_0 = 1.3153e-04
Loss = 3.4339e-01, PNorm = 63.7558, GNorm = 1.9369, lr_0 = 1.3144e-04
Loss = 3.2411e-01, PNorm = 63.7579, GNorm = 1.3834, lr_0 = 1.3135e-04
Loss = 3.1914e-01, PNorm = 63.7573, GNorm = 1.4934, lr_0 = 1.3126e-04
Loss = 3.2055e-01, PNorm = 63.7561, GNorm = 1.1742, lr_0 = 1.3117e-04
Loss = 3.6118e-01, PNorm = 63.7571, GNorm = 1.1992, lr_0 = 1.3108e-04
Loss = 3.3014e-01, PNorm = 63.7582, GNorm = 1.7765, lr_0 = 1.3099e-04
Loss = 3.2701e-01, PNorm = 63.7581, GNorm = 1.3723, lr_0 = 1.3090e-04
Loss = 3.2377e-01, PNorm = 63.7596, GNorm = 1.6280, lr_0 = 1.3081e-04
Loss = 3.3609e-01, PNorm = 63.7606, GNorm = 1.8636, lr_0 = 1.3072e-04
Loss = 3.1431e-01, PNorm = 63.7601, GNorm = 1.4700, lr_0 = 1.3063e-04
Loss = 4.0118e-01, PNorm = 63.7605, GNorm = 1.0239, lr_0 = 1.3054e-04
Loss = 4.1428e-01, PNorm = 63.7619, GNorm = 1.3071, lr_0 = 1.3045e-04
Loss = 3.3580e-01, PNorm = 63.7628, GNorm = 2.0610, lr_0 = 1.3036e-04
Loss = 3.6388e-01, PNorm = 63.7639, GNorm = 1.5480, lr_0 = 1.3027e-04
Loss = 3.8221e-01, PNorm = 63.7643, GNorm = 1.6273, lr_0 = 1.3018e-04
Loss = 3.2522e-01, PNorm = 63.7654, GNorm = 1.7031, lr_0 = 1.3009e-04
Loss = 3.1691e-01, PNorm = 63.7657, GNorm = 1.3827, lr_0 = 1.3000e-04
Loss = 3.5504e-01, PNorm = 63.7668, GNorm = 1.5281, lr_0 = 1.2992e-04
Loss = 3.5007e-01, PNorm = 63.7667, GNorm = 1.3709, lr_0 = 1.2983e-04
Loss = 3.5255e-01, PNorm = 63.7690, GNorm = 1.4820, lr_0 = 1.2974e-04
Loss = 2.9284e-01, PNorm = 63.7718, GNorm = 1.1220, lr_0 = 1.2965e-04
Loss = 3.6231e-01, PNorm = 63.7714, GNorm = 1.3899, lr_0 = 1.2956e-04
Loss = 3.7361e-01, PNorm = 63.7737, GNorm = 1.6552, lr_0 = 1.2947e-04
Loss = 3.5326e-01, PNorm = 63.7767, GNorm = 1.7473, lr_0 = 1.2938e-04
Loss = 4.1141e-01, PNorm = 63.7794, GNorm = 1.5641, lr_0 = 1.2929e-04
Loss = 3.6526e-01, PNorm = 63.7814, GNorm = 1.7487, lr_0 = 1.2921e-04
Loss = 3.6901e-01, PNorm = 63.7814, GNorm = 1.8913, lr_0 = 1.2912e-04
Loss = 3.4269e-01, PNorm = 63.7814, GNorm = 1.6303, lr_0 = 1.2903e-04
Loss = 3.1690e-01, PNorm = 63.7816, GNorm = 1.7876, lr_0 = 1.2894e-04
Loss = 3.5700e-01, PNorm = 63.7807, GNorm = 1.4775, lr_0 = 1.2885e-04
Loss = 3.0208e-01, PNorm = 63.7819, GNorm = 1.4478, lr_0 = 1.2876e-04
Loss = 3.5149e-01, PNorm = 63.7852, GNorm = 1.1482, lr_0 = 1.2867e-04
Loss = 3.5940e-01, PNorm = 63.7875, GNorm = 2.0729, lr_0 = 1.2859e-04
Loss = 3.9955e-01, PNorm = 63.7892, GNorm = 1.6011, lr_0 = 1.2850e-04
Loss = 3.7663e-01, PNorm = 63.7904, GNorm = 1.5336, lr_0 = 1.2841e-04
Loss = 3.5114e-01, PNorm = 63.7909, GNorm = 1.4094, lr_0 = 1.2832e-04
Loss = 3.0957e-01, PNorm = 63.7915, GNorm = 1.3098, lr_0 = 1.2823e-04
Loss = 3.1331e-01, PNorm = 63.7921, GNorm = 1.1837, lr_0 = 1.2815e-04
Loss = 3.6495e-01, PNorm = 63.7939, GNorm = 1.6187, lr_0 = 1.2806e-04
Loss = 3.4611e-01, PNorm = 63.7947, GNorm = 1.1022, lr_0 = 1.2797e-04
Validation mae = 0.110679
Epoch 27
Loss = 3.8515e-01, PNorm = 63.7949, GNorm = 1.7896, lr_0 = 1.2788e-04
Loss = 3.9056e-01, PNorm = 63.7974, GNorm = 1.8473, lr_0 = 1.2780e-04
Loss = 3.4944e-01, PNorm = 63.7986, GNorm = 1.9090, lr_0 = 1.2771e-04
Loss = 3.5359e-01, PNorm = 63.8006, GNorm = 0.8850, lr_0 = 1.2762e-04
Loss = 3.4160e-01, PNorm = 63.8038, GNorm = 2.0915, lr_0 = 1.2753e-04
Loss = 3.1329e-01, PNorm = 63.8055, GNorm = 1.8561, lr_0 = 1.2745e-04
Loss = 4.1595e-01, PNorm = 63.8086, GNorm = 1.5160, lr_0 = 1.2736e-04
Loss = 3.0620e-01, PNorm = 63.8118, GNorm = 1.2399, lr_0 = 1.2727e-04
Loss = 3.1398e-01, PNorm = 63.8138, GNorm = 1.8552, lr_0 = 1.2718e-04
Loss = 3.3945e-01, PNorm = 63.8153, GNorm = 1.5063, lr_0 = 1.2710e-04
Loss = 3.3008e-01, PNorm = 63.8159, GNorm = 1.5651, lr_0 = 1.2701e-04
Loss = 3.3236e-01, PNorm = 63.8172, GNorm = 1.6926, lr_0 = 1.2692e-04
Loss = 3.6990e-01, PNorm = 63.8183, GNorm = 1.6012, lr_0 = 1.2684e-04
Loss = 3.3953e-01, PNorm = 63.8195, GNorm = 1.2187, lr_0 = 1.2675e-04
Loss = 3.3067e-01, PNorm = 63.8207, GNorm = 1.5961, lr_0 = 1.2666e-04
Loss = 3.6391e-01, PNorm = 63.8209, GNorm = 1.7308, lr_0 = 1.2658e-04
Loss = 3.5476e-01, PNorm = 63.8214, GNorm = 1.2372, lr_0 = 1.2649e-04
Loss = 3.5791e-01, PNorm = 63.8221, GNorm = 1.5471, lr_0 = 1.2640e-04
Loss = 3.0289e-01, PNorm = 63.8231, GNorm = 1.5801, lr_0 = 1.2632e-04
Loss = 3.7767e-01, PNorm = 63.8256, GNorm = 1.4516, lr_0 = 1.2623e-04
Loss = 3.5686e-01, PNorm = 63.8281, GNorm = 1.4510, lr_0 = 1.2614e-04
Loss = 3.1527e-01, PNorm = 63.8280, GNorm = 1.2768, lr_0 = 1.2606e-04
Loss = 2.7552e-01, PNorm = 63.8283, GNorm = 1.4843, lr_0 = 1.2597e-04
Loss = 3.4681e-01, PNorm = 63.8299, GNorm = 1.4691, lr_0 = 1.2588e-04
Loss = 3.2386e-01, PNorm = 63.8308, GNorm = 1.0886, lr_0 = 1.2580e-04
Loss = 3.0865e-01, PNorm = 63.8308, GNorm = 1.8369, lr_0 = 1.2571e-04
Loss = 3.3290e-01, PNorm = 63.8332, GNorm = 1.2541, lr_0 = 1.2563e-04
Loss = 3.2170e-01, PNorm = 63.8341, GNorm = 1.3213, lr_0 = 1.2554e-04
Loss = 3.5342e-01, PNorm = 63.8339, GNorm = 1.8017, lr_0 = 1.2545e-04
Loss = 3.4586e-01, PNorm = 63.8339, GNorm = 2.0276, lr_0 = 1.2537e-04
Loss = 3.2884e-01, PNorm = 63.8364, GNorm = 1.1466, lr_0 = 1.2528e-04
Loss = 3.3917e-01, PNorm = 63.8375, GNorm = 1.5054, lr_0 = 1.2520e-04
Loss = 3.1616e-01, PNorm = 63.8400, GNorm = 1.3613, lr_0 = 1.2511e-04
Loss = 3.3806e-01, PNorm = 63.8439, GNorm = 1.4517, lr_0 = 1.2502e-04
Loss = 3.5995e-01, PNorm = 63.8441, GNorm = 1.8932, lr_0 = 1.2494e-04
Loss = 4.0488e-01, PNorm = 63.8438, GNorm = 1.2868, lr_0 = 1.2485e-04
Loss = 3.6315e-01, PNorm = 63.8449, GNorm = 1.4224, lr_0 = 1.2477e-04
Loss = 4.1670e-01, PNorm = 63.8451, GNorm = 1.7470, lr_0 = 1.2468e-04
Loss = 3.7944e-01, PNorm = 63.8444, GNorm = 1.5815, lr_0 = 1.2460e-04
Loss = 3.5162e-01, PNorm = 63.8455, GNorm = 1.4996, lr_0 = 1.2451e-04
Loss = 3.3969e-01, PNorm = 63.8463, GNorm = 1.9987, lr_0 = 1.2443e-04
Loss = 3.3665e-01, PNorm = 63.8470, GNorm = 1.4695, lr_0 = 1.2434e-04
Loss = 3.6406e-01, PNorm = 63.8469, GNorm = 1.9313, lr_0 = 1.2426e-04
Loss = 3.3261e-01, PNorm = 63.8488, GNorm = 2.0001, lr_0 = 1.2417e-04
Loss = 2.9433e-01, PNorm = 63.8509, GNorm = 1.2585, lr_0 = 1.2409e-04
Loss = 3.8297e-01, PNorm = 63.8513, GNorm = 1.2124, lr_0 = 1.2400e-04
Loss = 3.3882e-01, PNorm = 63.8521, GNorm = 1.4344, lr_0 = 1.2392e-04
Loss = 3.5711e-01, PNorm = 63.8517, GNorm = 1.7730, lr_0 = 1.2383e-04
Loss = 4.1874e-01, PNorm = 63.8518, GNorm = 1.4893, lr_0 = 1.2375e-04
Loss = 3.4084e-01, PNorm = 63.8530, GNorm = 1.2361, lr_0 = 1.2366e-04
Loss = 3.3258e-01, PNorm = 63.8566, GNorm = 1.3886, lr_0 = 1.2358e-04
Loss = 3.8301e-01, PNorm = 63.8584, GNorm = 1.3499, lr_0 = 1.2349e-04
Loss = 3.8316e-01, PNorm = 63.8590, GNorm = 1.1212, lr_0 = 1.2341e-04
Loss = 2.9831e-01, PNorm = 63.8601, GNorm = 1.3933, lr_0 = 1.2332e-04
Loss = 3.3174e-01, PNorm = 63.8605, GNorm = 1.5641, lr_0 = 1.2324e-04
Loss = 3.8837e-01, PNorm = 63.8595, GNorm = 1.3183, lr_0 = 1.2315e-04
Loss = 3.0957e-01, PNorm = 63.8589, GNorm = 1.2318, lr_0 = 1.2307e-04
Loss = 3.4082e-01, PNorm = 63.8609, GNorm = 2.0593, lr_0 = 1.2298e-04
Loss = 3.3898e-01, PNorm = 63.8619, GNorm = 1.3648, lr_0 = 1.2290e-04
Loss = 3.6133e-01, PNorm = 63.8626, GNorm = 1.6783, lr_0 = 1.2282e-04
Loss = 3.9103e-01, PNorm = 63.8633, GNorm = 1.5001, lr_0 = 1.2273e-04
Loss = 3.2896e-01, PNorm = 63.8635, GNorm = 1.4353, lr_0 = 1.2265e-04
Loss = 3.2642e-01, PNorm = 63.8643, GNorm = 1.5460, lr_0 = 1.2256e-04
Loss = 3.4782e-01, PNorm = 63.8656, GNorm = 1.9217, lr_0 = 1.2248e-04
Loss = 3.2484e-01, PNorm = 63.8667, GNorm = 1.5326, lr_0 = 1.2240e-04
Loss = 3.6815e-01, PNorm = 63.8671, GNorm = 1.5257, lr_0 = 1.2231e-04
Loss = 3.6601e-01, PNorm = 63.8663, GNorm = 2.2424, lr_0 = 1.2223e-04
Loss = 3.4994e-01, PNorm = 63.8662, GNorm = 1.4704, lr_0 = 1.2214e-04
Loss = 3.3766e-01, PNorm = 63.8682, GNorm = 2.0589, lr_0 = 1.2206e-04
Loss = 3.3210e-01, PNorm = 63.8691, GNorm = 2.0068, lr_0 = 1.2198e-04
Loss = 3.3801e-01, PNorm = 63.8696, GNorm = 1.0296, lr_0 = 1.2189e-04
Loss = 3.4873e-01, PNorm = 63.8716, GNorm = 1.5419, lr_0 = 1.2181e-04
Loss = 3.9142e-01, PNorm = 63.8735, GNorm = 4.1467, lr_0 = 1.2173e-04
Loss = 3.1115e-01, PNorm = 63.8741, GNorm = 1.5152, lr_0 = 1.2164e-04
Loss = 3.6317e-01, PNorm = 63.8749, GNorm = 1.3726, lr_0 = 1.2156e-04
Loss = 4.0613e-01, PNorm = 63.8751, GNorm = 1.9982, lr_0 = 1.2148e-04
Loss = 3.3684e-01, PNorm = 63.8775, GNorm = 1.0591, lr_0 = 1.2139e-04
Loss = 3.4049e-01, PNorm = 63.8782, GNorm = 1.1470, lr_0 = 1.2131e-04
Loss = 3.7910e-01, PNorm = 63.8795, GNorm = 1.8173, lr_0 = 1.2123e-04
Loss = 3.8535e-01, PNorm = 63.8803, GNorm = 2.5500, lr_0 = 1.2114e-04
Loss = 3.4374e-01, PNorm = 63.8826, GNorm = 1.8040, lr_0 = 1.2106e-04
Loss = 3.3888e-01, PNorm = 63.8827, GNorm = 1.5562, lr_0 = 1.2098e-04
Loss = 3.8031e-01, PNorm = 63.8833, GNorm = 1.3098, lr_0 = 1.2090e-04
Loss = 3.5247e-01, PNorm = 63.8858, GNorm = 1.5325, lr_0 = 1.2081e-04
Loss = 3.5566e-01, PNorm = 63.8892, GNorm = 1.3120, lr_0 = 1.2073e-04
Loss = 3.7858e-01, PNorm = 63.8880, GNorm = 1.2007, lr_0 = 1.2065e-04
Loss = 3.5925e-01, PNorm = 63.8874, GNorm = 1.6362, lr_0 = 1.2056e-04
Loss = 3.8645e-01, PNorm = 63.8887, GNorm = 1.7360, lr_0 = 1.2048e-04
Loss = 3.4948e-01, PNorm = 63.8888, GNorm = 1.1212, lr_0 = 1.2040e-04
Loss = 3.1467e-01, PNorm = 63.8893, GNorm = 1.4439, lr_0 = 1.2032e-04
Loss = 3.0858e-01, PNorm = 63.8904, GNorm = 1.7680, lr_0 = 1.2023e-04
Loss = 3.1031e-01, PNorm = 63.8921, GNorm = 1.8415, lr_0 = 1.2015e-04
Loss = 3.0775e-01, PNorm = 63.8943, GNorm = 1.5005, lr_0 = 1.2007e-04
Loss = 3.7532e-01, PNorm = 63.8955, GNorm = 1.7869, lr_0 = 1.1999e-04
Loss = 3.1972e-01, PNorm = 63.8973, GNorm = 1.1106, lr_0 = 1.1991e-04
Loss = 3.5209e-01, PNorm = 63.8984, GNorm = 1.3537, lr_0 = 1.1982e-04
Loss = 3.5811e-01, PNorm = 63.8999, GNorm = 1.3231, lr_0 = 1.1974e-04
Loss = 3.2938e-01, PNorm = 63.8992, GNorm = 1.2924, lr_0 = 1.1966e-04
Loss = 3.5626e-01, PNorm = 63.8984, GNorm = 1.4171, lr_0 = 1.1958e-04
Loss = 3.3554e-01, PNorm = 63.8989, GNorm = 1.5878, lr_0 = 1.1950e-04
Loss = 3.7222e-01, PNorm = 63.9006, GNorm = 1.3346, lr_0 = 1.1941e-04
Loss = 3.1726e-01, PNorm = 63.9010, GNorm = 1.4869, lr_0 = 1.1933e-04
Loss = 3.4969e-01, PNorm = 63.9010, GNorm = 1.3077, lr_0 = 1.1925e-04
Loss = 3.3117e-01, PNorm = 63.9034, GNorm = 1.5501, lr_0 = 1.1917e-04
Loss = 3.4339e-01, PNorm = 63.9063, GNorm = 1.7006, lr_0 = 1.1909e-04
Loss = 3.0574e-01, PNorm = 63.9068, GNorm = 1.5103, lr_0 = 1.1901e-04
Loss = 3.2447e-01, PNorm = 63.9073, GNorm = 1.6133, lr_0 = 1.1892e-04
Loss = 3.1755e-01, PNorm = 63.9083, GNorm = 1.0865, lr_0 = 1.1884e-04
Loss = 3.5001e-01, PNorm = 63.9086, GNorm = 1.4646, lr_0 = 1.1876e-04
Loss = 3.6691e-01, PNorm = 63.9083, GNorm = 1.6361, lr_0 = 1.1868e-04
Loss = 3.0447e-01, PNorm = 63.9096, GNorm = 1.2203, lr_0 = 1.1860e-04
Loss = 3.5651e-01, PNorm = 63.9098, GNorm = 2.1400, lr_0 = 1.1852e-04
Loss = 3.2821e-01, PNorm = 63.9106, GNorm = 1.0181, lr_0 = 1.1844e-04
Loss = 3.2576e-01, PNorm = 63.9110, GNorm = 1.5475, lr_0 = 1.1835e-04
Loss = 3.4635e-01, PNorm = 63.9101, GNorm = 1.9189, lr_0 = 1.1827e-04
Loss = 2.9245e-01, PNorm = 63.9100, GNorm = 1.0337, lr_0 = 1.1819e-04
Loss = 3.4327e-01, PNorm = 63.9112, GNorm = 1.4143, lr_0 = 1.1811e-04
Loss = 3.2612e-01, PNorm = 63.9119, GNorm = 1.2355, lr_0 = 1.1803e-04
Loss = 3.5925e-01, PNorm = 63.9137, GNorm = 2.0011, lr_0 = 1.1795e-04
Loss = 3.5971e-01, PNorm = 63.9153, GNorm = 1.1567, lr_0 = 1.1787e-04
Validation mae = 0.111132
Epoch 28
Loss = 3.4218e-01, PNorm = 63.9174, GNorm = 1.6126, lr_0 = 1.1779e-04
Loss = 3.5460e-01, PNorm = 63.9192, GNorm = 1.4619, lr_0 = 1.1771e-04
Loss = 2.7853e-01, PNorm = 63.9205, GNorm = 1.2590, lr_0 = 1.1763e-04
Loss = 3.4412e-01, PNorm = 63.9210, GNorm = 1.5877, lr_0 = 1.1755e-04
Loss = 3.2460e-01, PNorm = 63.9227, GNorm = 1.3779, lr_0 = 1.1747e-04
Loss = 3.4139e-01, PNorm = 63.9241, GNorm = 2.1210, lr_0 = 1.1739e-04
Loss = 3.3519e-01, PNorm = 63.9244, GNorm = 1.6457, lr_0 = 1.1730e-04
Loss = 3.6296e-01, PNorm = 63.9269, GNorm = 1.8138, lr_0 = 1.1722e-04
Loss = 3.4228e-01, PNorm = 63.9284, GNorm = 1.4153, lr_0 = 1.1714e-04
Loss = 3.1221e-01, PNorm = 63.9293, GNorm = 1.2841, lr_0 = 1.1706e-04
Loss = 3.2242e-01, PNorm = 63.9301, GNorm = 1.2394, lr_0 = 1.1698e-04
Loss = 3.3718e-01, PNorm = 63.9307, GNorm = 1.2425, lr_0 = 1.1690e-04
Loss = 3.6618e-01, PNorm = 63.9316, GNorm = 1.9117, lr_0 = 1.1682e-04
Loss = 3.6261e-01, PNorm = 63.9332, GNorm = 1.2398, lr_0 = 1.1674e-04
Loss = 3.2581e-01, PNorm = 63.9331, GNorm = 1.0389, lr_0 = 1.1666e-04
Loss = 3.3663e-01, PNorm = 63.9325, GNorm = 1.3130, lr_0 = 1.1658e-04
Loss = 3.2303e-01, PNorm = 63.9332, GNorm = 1.5173, lr_0 = 1.1650e-04
Loss = 3.4914e-01, PNorm = 63.9342, GNorm = 1.5920, lr_0 = 1.1642e-04
Loss = 3.8395e-01, PNorm = 63.9355, GNorm = 1.3893, lr_0 = 1.1634e-04
Loss = 3.3437e-01, PNorm = 63.9370, GNorm = 1.2707, lr_0 = 1.1626e-04
Loss = 3.9269e-01, PNorm = 63.9379, GNorm = 1.2832, lr_0 = 1.1618e-04
Loss = 3.1451e-01, PNorm = 63.9390, GNorm = 1.3048, lr_0 = 1.1611e-04
Loss = 3.3290e-01, PNorm = 63.9404, GNorm = 1.6311, lr_0 = 1.1603e-04
Loss = 3.7225e-01, PNorm = 63.9413, GNorm = 1.4710, lr_0 = 1.1595e-04
Loss = 3.4099e-01, PNorm = 63.9432, GNorm = 1.4772, lr_0 = 1.1587e-04
Loss = 3.4219e-01, PNorm = 63.9432, GNorm = 1.4986, lr_0 = 1.1579e-04
Loss = 3.5951e-01, PNorm = 63.9444, GNorm = 1.1721, lr_0 = 1.1571e-04
Loss = 3.4252e-01, PNorm = 63.9469, GNorm = 1.2740, lr_0 = 1.1563e-04
Loss = 3.1767e-01, PNorm = 63.9496, GNorm = 1.4230, lr_0 = 1.1555e-04
Loss = 2.9913e-01, PNorm = 63.9504, GNorm = 1.3580, lr_0 = 1.1547e-04
Loss = 3.3302e-01, PNorm = 63.9512, GNorm = 1.7062, lr_0 = 1.1539e-04
Loss = 3.5119e-01, PNorm = 63.9519, GNorm = 1.9686, lr_0 = 1.1531e-04
Loss = 3.3848e-01, PNorm = 63.9518, GNorm = 1.7455, lr_0 = 1.1523e-04
Loss = 3.4796e-01, PNorm = 63.9522, GNorm = 1.3615, lr_0 = 1.1515e-04
Loss = 3.0140e-01, PNorm = 63.9528, GNorm = 1.7478, lr_0 = 1.1508e-04
Loss = 3.3171e-01, PNorm = 63.9527, GNorm = 2.2445, lr_0 = 1.1500e-04
Loss = 3.4140e-01, PNorm = 63.9532, GNorm = 1.3690, lr_0 = 1.1492e-04
Loss = 3.1224e-01, PNorm = 63.9546, GNorm = 1.0962, lr_0 = 1.1484e-04
Loss = 3.2579e-01, PNorm = 63.9551, GNorm = 1.4424, lr_0 = 1.1476e-04
Loss = 3.4404e-01, PNorm = 63.9561, GNorm = 1.8532, lr_0 = 1.1468e-04
Loss = 3.4496e-01, PNorm = 63.9582, GNorm = 1.6567, lr_0 = 1.1460e-04
Loss = 3.1054e-01, PNorm = 63.9596, GNorm = 1.3850, lr_0 = 1.1452e-04
Loss = 3.9043e-01, PNorm = 63.9586, GNorm = 1.5199, lr_0 = 1.1445e-04
Loss = 4.0124e-01, PNorm = 63.9591, GNorm = 1.5244, lr_0 = 1.1437e-04
Loss = 3.2159e-01, PNorm = 63.9619, GNorm = 1.4666, lr_0 = 1.1429e-04
Loss = 3.3853e-01, PNorm = 63.9633, GNorm = 1.2837, lr_0 = 1.1421e-04
Loss = 3.6730e-01, PNorm = 63.9635, GNorm = 1.1375, lr_0 = 1.1413e-04
Loss = 3.4167e-01, PNorm = 63.9652, GNorm = 1.3358, lr_0 = 1.1405e-04
Loss = 3.4752e-01, PNorm = 63.9665, GNorm = 1.7257, lr_0 = 1.1398e-04
Loss = 3.5521e-01, PNorm = 63.9673, GNorm = 1.4319, lr_0 = 1.1390e-04
Loss = 3.7514e-01, PNorm = 63.9684, GNorm = 1.3117, lr_0 = 1.1382e-04
Loss = 3.2286e-01, PNorm = 63.9691, GNorm = 1.5866, lr_0 = 1.1374e-04
Loss = 3.4305e-01, PNorm = 63.9691, GNorm = 1.2018, lr_0 = 1.1366e-04
Loss = 3.2884e-01, PNorm = 63.9698, GNorm = 1.9239, lr_0 = 1.1359e-04
Loss = 3.4596e-01, PNorm = 63.9718, GNorm = 1.8180, lr_0 = 1.1351e-04
Loss = 3.3052e-01, PNorm = 63.9747, GNorm = 1.8731, lr_0 = 1.1343e-04
Loss = 3.2655e-01, PNorm = 63.9769, GNorm = 1.0959, lr_0 = 1.1335e-04
Loss = 3.5999e-01, PNorm = 63.9787, GNorm = 1.6825, lr_0 = 1.1328e-04
Loss = 3.3937e-01, PNorm = 63.9783, GNorm = 1.6469, lr_0 = 1.1320e-04
Loss = 3.5902e-01, PNorm = 63.9787, GNorm = 1.7511, lr_0 = 1.1312e-04
Loss = 3.2770e-01, PNorm = 63.9787, GNorm = 2.2198, lr_0 = 1.1304e-04
Loss = 3.3563e-01, PNorm = 63.9795, GNorm = 2.4334, lr_0 = 1.1297e-04
Loss = 2.9895e-01, PNorm = 63.9801, GNorm = 1.7787, lr_0 = 1.1289e-04
Loss = 3.5619e-01, PNorm = 63.9796, GNorm = 1.8311, lr_0 = 1.1281e-04
Loss = 3.8005e-01, PNorm = 63.9787, GNorm = 1.6137, lr_0 = 1.1273e-04
Loss = 2.9606e-01, PNorm = 63.9801, GNorm = 1.2132, lr_0 = 1.1266e-04
Loss = 3.1792e-01, PNorm = 63.9810, GNorm = 1.6756, lr_0 = 1.1258e-04
Loss = 3.3677e-01, PNorm = 63.9816, GNorm = 1.1373, lr_0 = 1.1250e-04
Loss = 3.9446e-01, PNorm = 63.9828, GNorm = 1.9111, lr_0 = 1.1243e-04
Loss = 3.0834e-01, PNorm = 63.9839, GNorm = 1.3258, lr_0 = 1.1235e-04
Loss = 3.8310e-01, PNorm = 63.9852, GNorm = 1.3148, lr_0 = 1.1227e-04
Loss = 3.5927e-01, PNorm = 63.9859, GNorm = 1.6828, lr_0 = 1.1219e-04
Loss = 3.3441e-01, PNorm = 63.9869, GNorm = 1.9669, lr_0 = 1.1212e-04
Loss = 3.1637e-01, PNorm = 63.9880, GNorm = 1.4428, lr_0 = 1.1204e-04
Loss = 3.2874e-01, PNorm = 63.9902, GNorm = 1.1813, lr_0 = 1.1196e-04
Loss = 3.5358e-01, PNorm = 63.9923, GNorm = 1.1138, lr_0 = 1.1189e-04
Loss = 3.6581e-01, PNorm = 63.9943, GNorm = 1.7605, lr_0 = 1.1181e-04
Loss = 3.6269e-01, PNorm = 63.9948, GNorm = 1.8170, lr_0 = 1.1173e-04
Loss = 3.7077e-01, PNorm = 63.9958, GNorm = 1.6034, lr_0 = 1.1166e-04
Loss = 2.9939e-01, PNorm = 63.9972, GNorm = 1.1369, lr_0 = 1.1158e-04
Loss = 3.9317e-01, PNorm = 63.9984, GNorm = 1.4141, lr_0 = 1.1150e-04
Loss = 3.3498e-01, PNorm = 63.9982, GNorm = 1.3770, lr_0 = 1.1143e-04
Loss = 3.2574e-01, PNorm = 64.0002, GNorm = 1.3951, lr_0 = 1.1135e-04
Loss = 3.7805e-01, PNorm = 64.0006, GNorm = 2.2097, lr_0 = 1.1128e-04
Loss = 3.2388e-01, PNorm = 64.0015, GNorm = 1.5469, lr_0 = 1.1120e-04
Loss = 3.3066e-01, PNorm = 64.0014, GNorm = 1.2226, lr_0 = 1.1112e-04
Loss = 4.1308e-01, PNorm = 64.0025, GNorm = 1.3580, lr_0 = 1.1105e-04
Loss = 3.5468e-01, PNorm = 64.0042, GNorm = 1.5668, lr_0 = 1.1097e-04
Loss = 3.1239e-01, PNorm = 64.0056, GNorm = 1.5801, lr_0 = 1.1089e-04
Loss = 3.5018e-01, PNorm = 64.0055, GNorm = 1.8563, lr_0 = 1.1082e-04
Loss = 3.6360e-01, PNorm = 64.0059, GNorm = 1.3068, lr_0 = 1.1074e-04
Loss = 3.3025e-01, PNorm = 64.0076, GNorm = 1.5377, lr_0 = 1.1067e-04
Loss = 3.0246e-01, PNorm = 64.0095, GNorm = 1.3350, lr_0 = 1.1059e-04
Loss = 3.4017e-01, PNorm = 64.0100, GNorm = 1.4261, lr_0 = 1.1052e-04
Loss = 3.3642e-01, PNorm = 64.0098, GNorm = 1.4921, lr_0 = 1.1044e-04
Loss = 3.2565e-01, PNorm = 64.0113, GNorm = 1.2775, lr_0 = 1.1036e-04
Loss = 3.0479e-01, PNorm = 64.0119, GNorm = 1.4503, lr_0 = 1.1029e-04
Loss = 3.2220e-01, PNorm = 64.0117, GNorm = 1.6964, lr_0 = 1.1021e-04
Loss = 3.8107e-01, PNorm = 64.0124, GNorm = 2.1914, lr_0 = 1.1014e-04
Loss = 3.3694e-01, PNorm = 64.0135, GNorm = 1.7881, lr_0 = 1.1006e-04
Loss = 4.0789e-01, PNorm = 64.0129, GNorm = 1.3869, lr_0 = 1.0999e-04
Loss = 3.8297e-01, PNorm = 64.0136, GNorm = 1.4821, lr_0 = 1.0991e-04
Loss = 3.3396e-01, PNorm = 64.0151, GNorm = 1.7591, lr_0 = 1.0984e-04
Loss = 3.6764e-01, PNorm = 64.0175, GNorm = 1.4526, lr_0 = 1.0976e-04
Loss = 3.2494e-01, PNorm = 64.0195, GNorm = 1.7979, lr_0 = 1.0969e-04
Loss = 3.6065e-01, PNorm = 64.0214, GNorm = 1.5798, lr_0 = 1.0961e-04
Loss = 3.5213e-01, PNorm = 64.0228, GNorm = 1.6126, lr_0 = 1.0954e-04
Loss = 3.4295e-01, PNorm = 64.0259, GNorm = 1.1001, lr_0 = 1.0946e-04
Loss = 3.4357e-01, PNorm = 64.0267, GNorm = 1.8606, lr_0 = 1.0939e-04
Loss = 3.9969e-01, PNorm = 64.0270, GNorm = 1.5809, lr_0 = 1.0931e-04
Loss = 3.3497e-01, PNorm = 64.0295, GNorm = 1.1554, lr_0 = 1.0924e-04
Loss = 3.8514e-01, PNorm = 64.0326, GNorm = 1.5968, lr_0 = 1.0916e-04
Loss = 3.6631e-01, PNorm = 64.0338, GNorm = 1.7982, lr_0 = 1.0909e-04
Loss = 3.2053e-01, PNorm = 64.0363, GNorm = 1.3820, lr_0 = 1.0901e-04
Loss = 3.9197e-01, PNorm = 64.0378, GNorm = 1.9019, lr_0 = 1.0894e-04
Loss = 3.6163e-01, PNorm = 64.0366, GNorm = 2.0418, lr_0 = 1.0886e-04
Loss = 3.6713e-01, PNorm = 64.0367, GNorm = 1.1905, lr_0 = 1.0879e-04
Loss = 3.4355e-01, PNorm = 64.0372, GNorm = 1.5431, lr_0 = 1.0871e-04
Loss = 3.2475e-01, PNorm = 64.0383, GNorm = 1.7016, lr_0 = 1.0864e-04
Loss = 3.0418e-01, PNorm = 64.0402, GNorm = 1.3310, lr_0 = 1.0856e-04
Validation mae = 0.111382
Epoch 29
Loss = 3.1585e-01, PNorm = 64.0413, GNorm = 1.5477, lr_0 = 1.0849e-04
Loss = 3.3766e-01, PNorm = 64.0422, GNorm = 1.3514, lr_0 = 1.0841e-04
Loss = 3.3377e-01, PNorm = 64.0442, GNorm = 2.2324, lr_0 = 1.0834e-04
Loss = 3.2178e-01, PNorm = 64.0465, GNorm = 1.4059, lr_0 = 1.0827e-04
Loss = 3.1750e-01, PNorm = 64.0468, GNorm = 1.5039, lr_0 = 1.0819e-04
Loss = 3.6816e-01, PNorm = 64.0474, GNorm = 1.7823, lr_0 = 1.0812e-04
Loss = 3.5476e-01, PNorm = 64.0504, GNorm = 1.4043, lr_0 = 1.0804e-04
Loss = 3.5893e-01, PNorm = 64.0509, GNorm = 1.6508, lr_0 = 1.0797e-04
Loss = 3.1832e-01, PNorm = 64.0506, GNorm = 1.1885, lr_0 = 1.0790e-04
Loss = 3.8840e-01, PNorm = 64.0520, GNorm = 2.1292, lr_0 = 1.0782e-04
Loss = 3.4070e-01, PNorm = 64.0531, GNorm = 1.0356, lr_0 = 1.0775e-04
Loss = 3.2510e-01, PNorm = 64.0538, GNorm = 1.3297, lr_0 = 1.0767e-04
Loss = 2.9655e-01, PNorm = 64.0542, GNorm = 1.1433, lr_0 = 1.0760e-04
Loss = 3.1594e-01, PNorm = 64.0568, GNorm = 1.5040, lr_0 = 1.0753e-04
Loss = 3.2218e-01, PNorm = 64.0593, GNorm = 1.1951, lr_0 = 1.0745e-04
Loss = 3.3769e-01, PNorm = 64.0606, GNorm = 1.7257, lr_0 = 1.0738e-04
Loss = 3.7793e-01, PNorm = 64.0611, GNorm = 1.8366, lr_0 = 1.0731e-04
Loss = 3.5190e-01, PNorm = 64.0625, GNorm = 1.5510, lr_0 = 1.0723e-04
Loss = 3.3492e-01, PNorm = 64.0631, GNorm = 1.4232, lr_0 = 1.0716e-04
Loss = 3.6827e-01, PNorm = 64.0634, GNorm = 1.2105, lr_0 = 1.0709e-04
Loss = 3.1233e-01, PNorm = 64.0646, GNorm = 1.3914, lr_0 = 1.0701e-04
Loss = 3.8930e-01, PNorm = 64.0654, GNorm = 1.2679, lr_0 = 1.0694e-04
Loss = 3.4344e-01, PNorm = 64.0675, GNorm = 1.3475, lr_0 = 1.0687e-04
Loss = 2.9310e-01, PNorm = 64.0678, GNorm = 1.3169, lr_0 = 1.0679e-04
Loss = 3.7422e-01, PNorm = 64.0666, GNorm = 1.6670, lr_0 = 1.0672e-04
Loss = 3.2314e-01, PNorm = 64.0675, GNorm = 1.8619, lr_0 = 1.0665e-04
Loss = 2.9943e-01, PNorm = 64.0696, GNorm = 1.2373, lr_0 = 1.0657e-04
Loss = 3.1877e-01, PNorm = 64.0706, GNorm = 1.8997, lr_0 = 1.0650e-04
Loss = 3.2087e-01, PNorm = 64.0711, GNorm = 1.7127, lr_0 = 1.0643e-04
Loss = 3.6675e-01, PNorm = 64.0723, GNorm = 2.3197, lr_0 = 1.0635e-04
Loss = 3.4329e-01, PNorm = 64.0741, GNorm = 1.9892, lr_0 = 1.0628e-04
Loss = 3.6762e-01, PNorm = 64.0744, GNorm = 1.7777, lr_0 = 1.0621e-04
Loss = 3.3087e-01, PNorm = 64.0741, GNorm = 1.3969, lr_0 = 1.0614e-04
Loss = 3.5287e-01, PNorm = 64.0750, GNorm = 1.2729, lr_0 = 1.0606e-04
Loss = 3.1485e-01, PNorm = 64.0763, GNorm = 1.4470, lr_0 = 1.0599e-04
Loss = 3.3496e-01, PNorm = 64.0777, GNorm = 1.2741, lr_0 = 1.0592e-04
Loss = 3.7060e-01, PNorm = 64.0796, GNorm = 1.5928, lr_0 = 1.0585e-04
Loss = 2.8071e-01, PNorm = 64.0814, GNorm = 1.2727, lr_0 = 1.0577e-04
Loss = 3.0897e-01, PNorm = 64.0814, GNorm = 1.7857, lr_0 = 1.0570e-04
Loss = 3.1667e-01, PNorm = 64.0815, GNorm = 1.9183, lr_0 = 1.0563e-04
Loss = 3.4738e-01, PNorm = 64.0823, GNorm = 1.9149, lr_0 = 1.0556e-04
Loss = 3.3410e-01, PNorm = 64.0836, GNorm = 1.4325, lr_0 = 1.0548e-04
Loss = 3.4048e-01, PNorm = 64.0852, GNorm = 1.4385, lr_0 = 1.0541e-04
Loss = 3.2653e-01, PNorm = 64.0863, GNorm = 1.6939, lr_0 = 1.0534e-04
Loss = 3.1842e-01, PNorm = 64.0854, GNorm = 1.8002, lr_0 = 1.0527e-04
Loss = 3.2723e-01, PNorm = 64.0850, GNorm = 1.8830, lr_0 = 1.0519e-04
Loss = 3.4459e-01, PNorm = 64.0849, GNorm = 1.7707, lr_0 = 1.0512e-04
Loss = 3.3309e-01, PNorm = 64.0860, GNorm = 1.9190, lr_0 = 1.0505e-04
Loss = 3.7697e-01, PNorm = 64.0862, GNorm = 2.2585, lr_0 = 1.0498e-04
Loss = 3.4320e-01, PNorm = 64.0860, GNorm = 1.4056, lr_0 = 1.0491e-04
Loss = 3.1678e-01, PNorm = 64.0868, GNorm = 1.0661, lr_0 = 1.0483e-04
Loss = 3.2784e-01, PNorm = 64.0875, GNorm = 1.2370, lr_0 = 1.0476e-04
Loss = 3.8239e-01, PNorm = 64.0892, GNorm = 1.5870, lr_0 = 1.0469e-04
Loss = 3.3135e-01, PNorm = 64.0908, GNorm = 1.6451, lr_0 = 1.0462e-04
Loss = 3.3133e-01, PNorm = 64.0929, GNorm = 2.2556, lr_0 = 1.0455e-04
Loss = 3.4111e-01, PNorm = 64.0943, GNorm = 1.3743, lr_0 = 1.0448e-04
Loss = 3.3174e-01, PNorm = 64.0966, GNorm = 1.5796, lr_0 = 1.0440e-04
Loss = 3.8600e-01, PNorm = 64.0984, GNorm = 2.1047, lr_0 = 1.0433e-04
Loss = 2.8301e-01, PNorm = 64.0987, GNorm = 1.2837, lr_0 = 1.0426e-04
Loss = 3.4870e-01, PNorm = 64.0994, GNorm = 1.0226, lr_0 = 1.0419e-04
Loss = 3.4680e-01, PNorm = 64.1001, GNorm = 1.7446, lr_0 = 1.0412e-04
Loss = 4.0252e-01, PNorm = 64.0990, GNorm = 1.5468, lr_0 = 1.0405e-04
Loss = 3.4916e-01, PNorm = 64.0993, GNorm = 1.7869, lr_0 = 1.0398e-04
Loss = 3.4208e-01, PNorm = 64.0990, GNorm = 1.4386, lr_0 = 1.0391e-04
Loss = 3.5102e-01, PNorm = 64.0996, GNorm = 1.0930, lr_0 = 1.0383e-04
Loss = 3.8962e-01, PNorm = 64.0999, GNorm = 1.5501, lr_0 = 1.0376e-04
Loss = 3.2585e-01, PNorm = 64.1011, GNorm = 1.3586, lr_0 = 1.0369e-04
Loss = 3.5726e-01, PNorm = 64.1026, GNorm = 2.0429, lr_0 = 1.0362e-04
Loss = 3.2876e-01, PNorm = 64.1052, GNorm = 2.0018, lr_0 = 1.0355e-04
Loss = 3.4761e-01, PNorm = 64.1074, GNorm = 1.2331, lr_0 = 1.0348e-04
Loss = 3.0562e-01, PNorm = 64.1089, GNorm = 1.9695, lr_0 = 1.0341e-04
Loss = 3.4649e-01, PNorm = 64.1115, GNorm = 1.7045, lr_0 = 1.0334e-04
Loss = 3.0338e-01, PNorm = 64.1133, GNorm = 1.3034, lr_0 = 1.0327e-04
Loss = 3.0643e-01, PNorm = 64.1154, GNorm = 1.6595, lr_0 = 1.0320e-04
Loss = 3.1766e-01, PNorm = 64.1154, GNorm = 1.4589, lr_0 = 1.0312e-04
Loss = 3.3174e-01, PNorm = 64.1147, GNorm = 1.4017, lr_0 = 1.0305e-04
Loss = 3.4780e-01, PNorm = 64.1141, GNorm = 1.8631, lr_0 = 1.0298e-04
Loss = 3.1902e-01, PNorm = 64.1135, GNorm = 1.3354, lr_0 = 1.0291e-04
Loss = 3.2776e-01, PNorm = 64.1134, GNorm = 1.0381, lr_0 = 1.0284e-04
Loss = 3.3540e-01, PNorm = 64.1145, GNorm = 1.7182, lr_0 = 1.0277e-04
Loss = 3.2466e-01, PNorm = 64.1147, GNorm = 1.6328, lr_0 = 1.0270e-04
Loss = 3.4385e-01, PNorm = 64.1148, GNorm = 1.7808, lr_0 = 1.0263e-04
Loss = 3.7327e-01, PNorm = 64.1151, GNorm = 1.8662, lr_0 = 1.0256e-04
Loss = 3.1654e-01, PNorm = 64.1160, GNorm = 1.8684, lr_0 = 1.0249e-04
Loss = 3.4883e-01, PNorm = 64.1162, GNorm = 1.0727, lr_0 = 1.0242e-04
Loss = 3.6639e-01, PNorm = 64.1158, GNorm = 1.4496, lr_0 = 1.0235e-04
Loss = 3.4152e-01, PNorm = 64.1173, GNorm = 1.3688, lr_0 = 1.0228e-04
Loss = 3.8152e-01, PNorm = 64.1185, GNorm = 1.7571, lr_0 = 1.0221e-04
Loss = 3.3602e-01, PNorm = 64.1187, GNorm = 1.5488, lr_0 = 1.0214e-04
Loss = 2.9274e-01, PNorm = 64.1198, GNorm = 1.1385, lr_0 = 1.0207e-04
Loss = 3.3736e-01, PNorm = 64.1218, GNorm = 1.3365, lr_0 = 1.0200e-04
Loss = 2.9656e-01, PNorm = 64.1230, GNorm = 1.1573, lr_0 = 1.0193e-04
Loss = 3.3432e-01, PNorm = 64.1235, GNorm = 1.5910, lr_0 = 1.0186e-04
Loss = 3.6520e-01, PNorm = 64.1241, GNorm = 1.1002, lr_0 = 1.0179e-04
Loss = 3.6469e-01, PNorm = 64.1240, GNorm = 1.5269, lr_0 = 1.0172e-04
Loss = 3.2719e-01, PNorm = 64.1259, GNorm = 1.4446, lr_0 = 1.0165e-04
Loss = 3.3303e-01, PNorm = 64.1286, GNorm = 1.5650, lr_0 = 1.0158e-04
Loss = 2.8711e-01, PNorm = 64.1306, GNorm = 1.8976, lr_0 = 1.0151e-04
Loss = 3.8374e-01, PNorm = 64.1306, GNorm = 1.8830, lr_0 = 1.0144e-04
Loss = 3.3632e-01, PNorm = 64.1318, GNorm = 1.2400, lr_0 = 1.0137e-04
Loss = 3.3354e-01, PNorm = 64.1348, GNorm = 1.5091, lr_0 = 1.0130e-04
Loss = 3.6679e-01, PNorm = 64.1355, GNorm = 1.7051, lr_0 = 1.0123e-04
Loss = 3.3854e-01, PNorm = 64.1362, GNorm = 1.7764, lr_0 = 1.0116e-04
Loss = 3.5224e-01, PNorm = 64.1364, GNorm = 1.5994, lr_0 = 1.0110e-04
Loss = 3.2215e-01, PNorm = 64.1383, GNorm = 1.6250, lr_0 = 1.0103e-04
Loss = 3.9660e-01, PNorm = 64.1394, GNorm = 1.8269, lr_0 = 1.0096e-04
Loss = 3.0293e-01, PNorm = 64.1402, GNorm = 1.1726, lr_0 = 1.0089e-04
Loss = 3.6374e-01, PNorm = 64.1412, GNorm = 1.7686, lr_0 = 1.0082e-04
Loss = 3.4957e-01, PNorm = 64.1424, GNorm = 2.0159, lr_0 = 1.0075e-04
Loss = 3.2389e-01, PNorm = 64.1449, GNorm = 2.3365, lr_0 = 1.0068e-04
Loss = 3.5906e-01, PNorm = 64.1445, GNorm = 1.4629, lr_0 = 1.0061e-04
Loss = 3.2712e-01, PNorm = 64.1452, GNorm = 1.2302, lr_0 = 1.0054e-04
Loss = 3.5333e-01, PNorm = 64.1465, GNorm = 1.7080, lr_0 = 1.0047e-04
Loss = 3.9755e-01, PNorm = 64.1456, GNorm = 1.5100, lr_0 = 1.0041e-04
Loss = 3.3257e-01, PNorm = 64.1450, GNorm = 1.4631, lr_0 = 1.0034e-04
Loss = 3.5454e-01, PNorm = 64.1456, GNorm = 1.4704, lr_0 = 1.0027e-04
Loss = 3.7797e-01, PNorm = 64.1465, GNorm = 1.7682, lr_0 = 1.0020e-04
Loss = 3.4181e-01, PNorm = 64.1475, GNorm = 1.5590, lr_0 = 1.0013e-04
Loss = 3.2258e-01, PNorm = 64.1485, GNorm = 1.2526, lr_0 = 1.0006e-04
Loss = 3.4480e-01, PNorm = 64.1505, GNorm = 2.2545, lr_0 = 1.0000e-04
Validation mae = 0.110671
Model 0 best validation mae = 0.110671 on epoch 29
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.110407
Ensemble test mae = 0.110407
Fold 7
Splitting data with seed 7
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=300, bias=False)
        (W_h): Linear(in_features=300, out_features=300, bias=False)
        (W_o): Linear(in_features=433, out_features=300, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.2, inplace=False)
    (1): Linear(in_features=300, out_features=300, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=300, out_features=300, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.2, inplace=False)
    (7): Linear(in_features=300, out_features=1, bias=True)
  )
)
Number of parameters = 445,201
Moving model to cuda
Epoch 0
Loss = 1.0783e+00, PNorm = 38.1702, GNorm = 3.6796, lr_0 = 1.0413e-04
Loss = 1.0081e+00, PNorm = 38.1702, GNorm = 3.0836, lr_0 = 1.0788e-04
Loss = 1.1548e+00, PNorm = 38.1707, GNorm = 2.6293, lr_0 = 1.1163e-04
Loss = 9.2653e-01, PNorm = 38.1711, GNorm = 3.0611, lr_0 = 1.1537e-04
Loss = 9.8219e-01, PNorm = 38.1718, GNorm = 2.3126, lr_0 = 1.1913e-04
Loss = 1.0311e+00, PNorm = 38.1723, GNorm = 3.0384, lr_0 = 1.2287e-04
Loss = 8.8286e-01, PNorm = 38.1725, GNorm = 3.8162, lr_0 = 1.2663e-04
Loss = 8.6153e-01, PNorm = 38.1731, GNorm = 2.6320, lr_0 = 1.3038e-04
Loss = 8.5558e-01, PNorm = 38.1739, GNorm = 5.7143, lr_0 = 1.3413e-04
Loss = 8.9714e-01, PNorm = 38.1754, GNorm = 3.0678, lr_0 = 1.3788e-04
Loss = 9.5373e-01, PNorm = 38.1772, GNorm = 4.1274, lr_0 = 1.4163e-04
Loss = 8.9714e-01, PNorm = 38.1786, GNorm = 1.4779, lr_0 = 1.4537e-04
Loss = 8.3355e-01, PNorm = 38.1802, GNorm = 2.9422, lr_0 = 1.4913e-04
Loss = 8.4398e-01, PNorm = 38.1823, GNorm = 3.5421, lr_0 = 1.5288e-04
Loss = 8.9191e-01, PNorm = 38.1841, GNorm = 4.8185, lr_0 = 1.5662e-04
Loss = 8.0644e-01, PNorm = 38.1860, GNorm = 2.8181, lr_0 = 1.6038e-04
Loss = 7.6994e-01, PNorm = 38.1887, GNorm = 3.1653, lr_0 = 1.6412e-04
Loss = 7.4154e-01, PNorm = 38.1910, GNorm = 3.2295, lr_0 = 1.6788e-04
Loss = 8.5146e-01, PNorm = 38.1934, GNorm = 3.1943, lr_0 = 1.7163e-04
Loss = 7.7182e-01, PNorm = 38.1968, GNorm = 1.9906, lr_0 = 1.7538e-04
Loss = 7.2127e-01, PNorm = 38.1987, GNorm = 5.0557, lr_0 = 1.7913e-04
Loss = 7.4333e-01, PNorm = 38.2010, GNorm = 2.8002, lr_0 = 1.8288e-04
Loss = 8.5913e-01, PNorm = 38.2038, GNorm = 5.2472, lr_0 = 1.8662e-04
Loss = 7.6533e-01, PNorm = 38.2062, GNorm = 9.7958, lr_0 = 1.9038e-04
Loss = 8.6105e-01, PNorm = 38.2082, GNorm = 6.6431, lr_0 = 1.9413e-04
Loss = 6.7209e-01, PNorm = 38.2116, GNorm = 6.2694, lr_0 = 1.9788e-04
Loss = 8.2809e-01, PNorm = 38.2138, GNorm = 1.7006, lr_0 = 2.0163e-04
Loss = 8.4370e-01, PNorm = 38.2168, GNorm = 4.7110, lr_0 = 2.0537e-04
Loss = 7.6328e-01, PNorm = 38.2197, GNorm = 4.1822, lr_0 = 2.0913e-04
Loss = 7.0421e-01, PNorm = 38.2219, GNorm = 4.6278, lr_0 = 2.1288e-04
Loss = 6.7761e-01, PNorm = 38.2240, GNorm = 2.0511, lr_0 = 2.1663e-04
Loss = 8.5509e-01, PNorm = 38.2291, GNorm = 2.1953, lr_0 = 2.2038e-04
Loss = 8.6823e-01, PNorm = 38.2329, GNorm = 3.5186, lr_0 = 2.2412e-04
Loss = 6.1423e-01, PNorm = 38.2345, GNorm = 2.5958, lr_0 = 2.2787e-04
Loss = 7.5052e-01, PNorm = 38.2376, GNorm = 2.4187, lr_0 = 2.3163e-04
Loss = 6.6915e-01, PNorm = 38.2417, GNorm = 5.7191, lr_0 = 2.3538e-04
Loss = 7.6391e-01, PNorm = 38.2443, GNorm = 2.7743, lr_0 = 2.3913e-04
Loss = 7.1627e-01, PNorm = 38.2487, GNorm = 3.3073, lr_0 = 2.4288e-04
Loss = 6.6793e-01, PNorm = 38.2515, GNorm = 4.2168, lr_0 = 2.4662e-04
Loss = 6.8665e-01, PNorm = 38.2532, GNorm = 8.2147, lr_0 = 2.5038e-04
Loss = 8.2266e-01, PNorm = 38.2548, GNorm = 2.2518, lr_0 = 2.5413e-04
Loss = 7.1770e-01, PNorm = 38.2577, GNorm = 2.0475, lr_0 = 2.5788e-04
Loss = 8.1264e-01, PNorm = 38.2619, GNorm = 3.3900, lr_0 = 2.6163e-04
Loss = 6.5650e-01, PNorm = 38.2646, GNorm = 2.1717, lr_0 = 2.6537e-04
Loss = 7.2810e-01, PNorm = 38.2682, GNorm = 1.6609, lr_0 = 2.6912e-04
Loss = 6.6867e-01, PNorm = 38.2712, GNorm = 2.1574, lr_0 = 2.7288e-04
Loss = 7.1806e-01, PNorm = 38.2757, GNorm = 5.3876, lr_0 = 2.7663e-04
Loss = 7.6926e-01, PNorm = 38.2785, GNorm = 1.6267, lr_0 = 2.8038e-04
Loss = 6.2491e-01, PNorm = 38.2798, GNorm = 4.8004, lr_0 = 2.8413e-04
Loss = 6.0285e-01, PNorm = 38.2837, GNorm = 5.1253, lr_0 = 2.8787e-04
Loss = 7.2428e-01, PNorm = 38.2892, GNorm = 2.7541, lr_0 = 2.9163e-04
Loss = 7.0838e-01, PNorm = 38.2936, GNorm = 1.8766, lr_0 = 2.9538e-04
Loss = 7.3710e-01, PNorm = 38.2969, GNorm = 6.8078, lr_0 = 2.9913e-04
Loss = 7.4729e-01, PNorm = 38.3045, GNorm = 7.5502, lr_0 = 3.0288e-04
Loss = 7.3602e-01, PNorm = 38.3103, GNorm = 9.9996, lr_0 = 3.0662e-04
Loss = 6.6155e-01, PNorm = 38.3135, GNorm = 6.2211, lr_0 = 3.1037e-04
Loss = 6.8811e-01, PNorm = 38.3200, GNorm = 2.0159, lr_0 = 3.1413e-04
Loss = 6.1654e-01, PNorm = 38.3240, GNorm = 2.6735, lr_0 = 3.1788e-04
Loss = 6.3881e-01, PNorm = 38.3246, GNorm = 1.3396, lr_0 = 3.2163e-04
Loss = 6.6568e-01, PNorm = 38.3286, GNorm = 3.0684, lr_0 = 3.2538e-04
Loss = 6.4430e-01, PNorm = 38.3334, GNorm = 1.5094, lr_0 = 3.2912e-04
Loss = 6.4573e-01, PNorm = 38.3375, GNorm = 3.0523, lr_0 = 3.3288e-04
Loss = 6.8984e-01, PNorm = 38.3430, GNorm = 3.1073, lr_0 = 3.3663e-04
Loss = 6.8627e-01, PNorm = 38.3488, GNorm = 14.1767, lr_0 = 3.4038e-04
Loss = 6.8002e-01, PNorm = 38.3530, GNorm = 2.2803, lr_0 = 3.4413e-04
Loss = 6.2821e-01, PNorm = 38.3598, GNorm = 3.3138, lr_0 = 3.4787e-04
Loss = 6.8044e-01, PNorm = 38.3662, GNorm = 3.1074, lr_0 = 3.5162e-04
Loss = 7.0060e-01, PNorm = 38.3708, GNorm = 2.7336, lr_0 = 3.5538e-04
Loss = 7.4718e-01, PNorm = 38.3749, GNorm = 4.7138, lr_0 = 3.5913e-04
Loss = 6.5633e-01, PNorm = 38.3805, GNorm = 2.7803, lr_0 = 3.6288e-04
Loss = 5.8132e-01, PNorm = 38.3881, GNorm = 4.5545, lr_0 = 3.6662e-04
Loss = 6.5206e-01, PNorm = 38.3930, GNorm = 3.9937, lr_0 = 3.7037e-04
Loss = 6.3690e-01, PNorm = 38.3951, GNorm = 4.7297, lr_0 = 3.7413e-04
Loss = 6.6124e-01, PNorm = 38.3984, GNorm = 13.1175, lr_0 = 3.7788e-04
Loss = 7.6077e-01, PNorm = 38.4024, GNorm = 3.1809, lr_0 = 3.8163e-04
Loss = 7.2685e-01, PNorm = 38.4115, GNorm = 3.2756, lr_0 = 3.8537e-04
Loss = 7.6740e-01, PNorm = 38.4164, GNorm = 5.2351, lr_0 = 3.8912e-04
Loss = 6.8195e-01, PNorm = 38.4235, GNorm = 2.1714, lr_0 = 3.9287e-04
Loss = 6.1633e-01, PNorm = 38.4275, GNorm = 2.8515, lr_0 = 3.9663e-04
Loss = 6.3457e-01, PNorm = 38.4324, GNorm = 6.8614, lr_0 = 4.0038e-04
Loss = 6.6807e-01, PNorm = 38.4406, GNorm = 4.5923, lr_0 = 4.0413e-04
Loss = 7.1703e-01, PNorm = 38.4426, GNorm = 1.9111, lr_0 = 4.0787e-04
Loss = 6.8975e-01, PNorm = 38.4483, GNorm = 1.2114, lr_0 = 4.1162e-04
Loss = 6.5996e-01, PNorm = 38.4560, GNorm = 3.5186, lr_0 = 4.1537e-04
Loss = 5.9616e-01, PNorm = 38.4624, GNorm = 4.6396, lr_0 = 4.1913e-04
Loss = 5.8917e-01, PNorm = 38.4683, GNorm = 1.9007, lr_0 = 4.2288e-04
Loss = 6.2453e-01, PNorm = 38.4708, GNorm = 4.6929, lr_0 = 4.2662e-04
Loss = 6.1580e-01, PNorm = 38.4787, GNorm = 3.9271, lr_0 = 4.3037e-04
Loss = 6.9940e-01, PNorm = 38.4862, GNorm = 4.1718, lr_0 = 4.3412e-04
Loss = 6.9691e-01, PNorm = 38.4975, GNorm = 5.6712, lr_0 = 4.3788e-04
Loss = 5.6248e-01, PNorm = 38.5047, GNorm = 2.3809, lr_0 = 4.4163e-04
Loss = 6.1243e-01, PNorm = 38.5115, GNorm = 1.7975, lr_0 = 4.4538e-04
Loss = 7.1384e-01, PNorm = 38.5152, GNorm = 1.4570, lr_0 = 4.4912e-04
Loss = 6.6566e-01, PNorm = 38.5227, GNorm = 1.0001, lr_0 = 4.5287e-04
Loss = 6.8738e-01, PNorm = 38.5348, GNorm = 7.0548, lr_0 = 4.5662e-04
Loss = 6.2776e-01, PNorm = 38.5434, GNorm = 5.2511, lr_0 = 4.6038e-04
Loss = 6.3354e-01, PNorm = 38.5488, GNorm = 8.5205, lr_0 = 4.6413e-04
Loss = 6.1716e-01, PNorm = 38.5514, GNorm = 5.4797, lr_0 = 4.6787e-04
Loss = 6.1512e-01, PNorm = 38.5594, GNorm = 2.3855, lr_0 = 4.7162e-04
Loss = 6.8353e-01, PNorm = 38.5699, GNorm = 1.6756, lr_0 = 4.7537e-04
Loss = 6.9728e-01, PNorm = 38.5766, GNorm = 12.7988, lr_0 = 4.7913e-04
Loss = 5.9794e-01, PNorm = 38.5834, GNorm = 1.2847, lr_0 = 4.8288e-04
Loss = 6.5886e-01, PNorm = 38.5920, GNorm = 4.3636, lr_0 = 4.8663e-04
Loss = 6.0238e-01, PNorm = 38.5983, GNorm = 6.7061, lr_0 = 4.9038e-04
Loss = 7.7776e-01, PNorm = 38.6038, GNorm = 3.9729, lr_0 = 4.9412e-04
Loss = 7.0467e-01, PNorm = 38.6126, GNorm = 3.8911, lr_0 = 4.9788e-04
Loss = 6.4398e-01, PNorm = 38.6231, GNorm = 1.9814, lr_0 = 5.0163e-04
Loss = 6.8457e-01, PNorm = 38.6309, GNorm = 5.0070, lr_0 = 5.0538e-04
Loss = 6.1062e-01, PNorm = 38.6381, GNorm = 3.7865, lr_0 = 5.0913e-04
Loss = 7.1629e-01, PNorm = 38.6460, GNorm = 12.8216, lr_0 = 5.1287e-04
Loss = 7.1409e-01, PNorm = 38.6506, GNorm = 1.4891, lr_0 = 5.1663e-04
Loss = 6.1445e-01, PNorm = 38.6603, GNorm = 2.3644, lr_0 = 5.2038e-04
Loss = 6.7830e-01, PNorm = 38.6658, GNorm = 3.0755, lr_0 = 5.2413e-04
Loss = 6.2933e-01, PNorm = 38.6799, GNorm = 3.7501, lr_0 = 5.2788e-04
Loss = 6.6163e-01, PNorm = 38.6930, GNorm = 5.0607, lr_0 = 5.3162e-04
Loss = 6.7780e-01, PNorm = 38.7033, GNorm = 1.8320, lr_0 = 5.3538e-04
Loss = 5.9948e-01, PNorm = 38.7133, GNorm = 6.7351, lr_0 = 5.3912e-04
Loss = 6.2555e-01, PNorm = 38.7249, GNorm = 5.4505, lr_0 = 5.4288e-04
Loss = 6.3249e-01, PNorm = 38.7339, GNorm = 1.6783, lr_0 = 5.4663e-04
Loss = 5.8138e-01, PNorm = 38.7423, GNorm = 4.3492, lr_0 = 5.5038e-04
Validation mae = 0.137841
Epoch 1
Loss = 6.2000e-01, PNorm = 38.7531, GNorm = 2.9483, lr_0 = 5.5413e-04
Loss = 5.5198e-01, PNorm = 38.7621, GNorm = 5.1479, lr_0 = 5.5787e-04
Loss = 5.8967e-01, PNorm = 38.7769, GNorm = 5.8620, lr_0 = 5.6163e-04
Loss = 7.1770e-01, PNorm = 38.7860, GNorm = 3.2444, lr_0 = 5.6538e-04
Loss = 6.9880e-01, PNorm = 38.7953, GNorm = 3.3917, lr_0 = 5.6913e-04
Loss = 6.3907e-01, PNorm = 38.8047, GNorm = 2.1173, lr_0 = 5.7288e-04
Loss = 7.4701e-01, PNorm = 38.8178, GNorm = 4.6558, lr_0 = 5.7662e-04
Loss = 6.5146e-01, PNorm = 38.8282, GNorm = 5.5843, lr_0 = 5.8038e-04
Loss = 5.3455e-01, PNorm = 38.8365, GNorm = 1.6802, lr_0 = 5.8413e-04
Loss = 5.4545e-01, PNorm = 38.8447, GNorm = 1.9410, lr_0 = 5.8788e-04
Loss = 5.5908e-01, PNorm = 38.8531, GNorm = 2.8526, lr_0 = 5.9163e-04
Loss = 6.6046e-01, PNorm = 38.8618, GNorm = 2.1534, lr_0 = 5.9538e-04
Loss = 6.0855e-01, PNorm = 38.8767, GNorm = 1.0835, lr_0 = 5.9913e-04
Loss = 5.9145e-01, PNorm = 38.8878, GNorm = 2.0045, lr_0 = 6.0288e-04
Loss = 6.4457e-01, PNorm = 38.8934, GNorm = 2.1564, lr_0 = 6.0663e-04
Loss = 6.9535e-01, PNorm = 38.9033, GNorm = 2.3503, lr_0 = 6.1038e-04
Loss = 6.0275e-01, PNorm = 38.9155, GNorm = 4.1094, lr_0 = 6.1413e-04
Loss = 6.2442e-01, PNorm = 38.9241, GNorm = 2.5670, lr_0 = 6.1788e-04
Loss = 6.0986e-01, PNorm = 38.9348, GNorm = 3.4003, lr_0 = 6.2163e-04
Loss = 6.7546e-01, PNorm = 38.9449, GNorm = 8.8521, lr_0 = 6.2538e-04
Loss = 6.6905e-01, PNorm = 38.9598, GNorm = 8.1829, lr_0 = 6.2913e-04
Loss = 6.5751e-01, PNorm = 38.9710, GNorm = 1.2807, lr_0 = 6.3288e-04
Loss = 5.4483e-01, PNorm = 38.9888, GNorm = 2.3704, lr_0 = 6.3663e-04
Loss = 6.1916e-01, PNorm = 38.9985, GNorm = 1.8353, lr_0 = 6.4038e-04
Loss = 5.8462e-01, PNorm = 39.0057, GNorm = 1.0774, lr_0 = 6.4413e-04
Loss = 5.8808e-01, PNorm = 39.0187, GNorm = 3.3732, lr_0 = 6.4788e-04
Loss = 5.6730e-01, PNorm = 39.0331, GNorm = 5.2920, lr_0 = 6.5163e-04
Loss = 5.5260e-01, PNorm = 39.0430, GNorm = 1.3908, lr_0 = 6.5538e-04
Loss = 5.6626e-01, PNorm = 39.0572, GNorm = 1.3060, lr_0 = 6.5913e-04
Loss = 5.9870e-01, PNorm = 39.0717, GNorm = 3.2471, lr_0 = 6.6288e-04
Loss = 6.3538e-01, PNorm = 39.0777, GNorm = 1.7948, lr_0 = 6.6663e-04
Loss = 6.4310e-01, PNorm = 39.0894, GNorm = 5.6463, lr_0 = 6.7038e-04
Loss = 6.3501e-01, PNorm = 39.1070, GNorm = 2.3912, lr_0 = 6.7413e-04
Loss = 6.2164e-01, PNorm = 39.1168, GNorm = 7.7866, lr_0 = 6.7788e-04
Loss = 6.0164e-01, PNorm = 39.1337, GNorm = 3.9896, lr_0 = 6.8163e-04
Loss = 6.2753e-01, PNorm = 39.1468, GNorm = 4.2853, lr_0 = 6.8538e-04
Loss = 5.7185e-01, PNorm = 39.1609, GNorm = 4.2175, lr_0 = 6.8913e-04
Loss = 5.9974e-01, PNorm = 39.1813, GNorm = 1.7330, lr_0 = 6.9288e-04
Loss = 6.5150e-01, PNorm = 39.1984, GNorm = 1.9668, lr_0 = 6.9663e-04
Loss = 6.6375e-01, PNorm = 39.2137, GNorm = 2.4473, lr_0 = 7.0038e-04
Loss = 6.6102e-01, PNorm = 39.2231, GNorm = 1.8237, lr_0 = 7.0413e-04
Loss = 6.2554e-01, PNorm = 39.2414, GNorm = 2.3200, lr_0 = 7.0788e-04
Loss = 5.6224e-01, PNorm = 39.2533, GNorm = 2.2331, lr_0 = 7.1163e-04
Loss = 6.8112e-01, PNorm = 39.2722, GNorm = 2.6472, lr_0 = 7.1538e-04
Loss = 6.5257e-01, PNorm = 39.2873, GNorm = 2.2327, lr_0 = 7.1913e-04
Loss = 5.8955e-01, PNorm = 39.2985, GNorm = 1.7002, lr_0 = 7.2288e-04
Loss = 6.2259e-01, PNorm = 39.3122, GNorm = 6.4674, lr_0 = 7.2663e-04
Loss = 6.8352e-01, PNorm = 39.3210, GNorm = 6.9287, lr_0 = 7.3038e-04
Loss = 6.1984e-01, PNorm = 39.3406, GNorm = 1.7955, lr_0 = 7.3413e-04
Loss = 5.5973e-01, PNorm = 39.3610, GNorm = 3.9818, lr_0 = 7.3788e-04
Loss = 5.3369e-01, PNorm = 39.3756, GNorm = 4.7684, lr_0 = 7.4163e-04
Loss = 5.8963e-01, PNorm = 39.3816, GNorm = 1.0019, lr_0 = 7.4538e-04
Loss = 6.2500e-01, PNorm = 39.3924, GNorm = 0.9049, lr_0 = 7.4913e-04
Loss = 6.0612e-01, PNorm = 39.4091, GNorm = 4.8737, lr_0 = 7.5288e-04
Loss = 6.5251e-01, PNorm = 39.4195, GNorm = 1.3951, lr_0 = 7.5663e-04
Loss = 5.7683e-01, PNorm = 39.4401, GNorm = 1.9508, lr_0 = 7.6038e-04
Loss = 5.6714e-01, PNorm = 39.4543, GNorm = 1.6806, lr_0 = 7.6413e-04
Loss = 6.4366e-01, PNorm = 39.4706, GNorm = 4.0125, lr_0 = 7.6788e-04
Loss = 5.8310e-01, PNorm = 39.4859, GNorm = 2.3637, lr_0 = 7.7163e-04
Loss = 5.6145e-01, PNorm = 39.5070, GNorm = 6.3563, lr_0 = 7.7538e-04
Loss = 5.9641e-01, PNorm = 39.5157, GNorm = 2.9680, lr_0 = 7.7913e-04
Loss = 5.6555e-01, PNorm = 39.5318, GNorm = 1.2911, lr_0 = 7.8288e-04
Loss = 5.5795e-01, PNorm = 39.5479, GNorm = 0.9498, lr_0 = 7.8663e-04
Loss = 6.1518e-01, PNorm = 39.5645, GNorm = 3.2975, lr_0 = 7.9038e-04
Loss = 5.9465e-01, PNorm = 39.5678, GNorm = 1.5271, lr_0 = 7.9413e-04
Loss = 5.5571e-01, PNorm = 39.5869, GNorm = 2.4157, lr_0 = 7.9788e-04
Loss = 6.2119e-01, PNorm = 39.6030, GNorm = 2.6704, lr_0 = 8.0163e-04
Loss = 6.2640e-01, PNorm = 39.6231, GNorm = 2.8177, lr_0 = 8.0538e-04
Loss = 5.7611e-01, PNorm = 39.6368, GNorm = 1.1599, lr_0 = 8.0913e-04
Loss = 6.1810e-01, PNorm = 39.6574, GNorm = 3.1456, lr_0 = 8.1288e-04
Loss = 6.5705e-01, PNorm = 39.6888, GNorm = 1.3734, lr_0 = 8.1663e-04
Loss = 5.5883e-01, PNorm = 39.7095, GNorm = 1.3312, lr_0 = 8.2038e-04
Loss = 5.5758e-01, PNorm = 39.7320, GNorm = 2.9344, lr_0 = 8.2413e-04
Loss = 6.4620e-01, PNorm = 39.7520, GNorm = 2.0299, lr_0 = 8.2788e-04
Loss = 6.4026e-01, PNorm = 39.7677, GNorm = 2.6848, lr_0 = 8.3163e-04
Loss = 6.2780e-01, PNorm = 39.7930, GNorm = 1.4019, lr_0 = 8.3538e-04
Loss = 5.4024e-01, PNorm = 39.8103, GNorm = 1.0263, lr_0 = 8.3913e-04
Loss = 5.5039e-01, PNorm = 39.8339, GNorm = 4.0953, lr_0 = 8.4288e-04
Loss = 5.5069e-01, PNorm = 39.8506, GNorm = 2.2655, lr_0 = 8.4663e-04
Loss = 6.0516e-01, PNorm = 39.8772, GNorm = 1.1841, lr_0 = 8.5038e-04
Loss = 6.3416e-01, PNorm = 39.8988, GNorm = 1.0507, lr_0 = 8.5413e-04
Loss = 4.9847e-01, PNorm = 39.9122, GNorm = 2.5113, lr_0 = 8.5788e-04
Loss = 5.4779e-01, PNorm = 39.9267, GNorm = 1.6262, lr_0 = 8.6163e-04
Loss = 5.5768e-01, PNorm = 39.9502, GNorm = 1.3198, lr_0 = 8.6538e-04
Loss = 5.6721e-01, PNorm = 39.9708, GNorm = 5.3907, lr_0 = 8.6913e-04
Loss = 5.7896e-01, PNorm = 39.9841, GNorm = 4.6332, lr_0 = 8.7288e-04
Loss = 5.2115e-01, PNorm = 40.0039, GNorm = 1.9222, lr_0 = 8.7663e-04
Loss = 5.5182e-01, PNorm = 40.0158, GNorm = 6.3813, lr_0 = 8.8038e-04
Loss = 5.5483e-01, PNorm = 40.0332, GNorm = 2.7958, lr_0 = 8.8413e-04
Loss = 5.3145e-01, PNorm = 40.0447, GNorm = 4.1999, lr_0 = 8.8788e-04
Loss = 5.8882e-01, PNorm = 40.0593, GNorm = 1.2860, lr_0 = 8.9163e-04
Loss = 4.9648e-01, PNorm = 40.0852, GNorm = 1.6034, lr_0 = 8.9538e-04
Loss = 5.5970e-01, PNorm = 40.0951, GNorm = 1.3982, lr_0 = 8.9913e-04
Loss = 5.6097e-01, PNorm = 40.1138, GNorm = 3.5862, lr_0 = 9.0288e-04
Loss = 6.0023e-01, PNorm = 40.1369, GNorm = 2.4316, lr_0 = 9.0663e-04
Loss = 5.7506e-01, PNorm = 40.1632, GNorm = 1.5590, lr_0 = 9.1038e-04
Loss = 5.8379e-01, PNorm = 40.1823, GNorm = 1.1772, lr_0 = 9.1413e-04
Loss = 5.8672e-01, PNorm = 40.1969, GNorm = 2.5418, lr_0 = 9.1788e-04
Loss = 6.5467e-01, PNorm = 40.2105, GNorm = 2.7383, lr_0 = 9.2163e-04
Loss = 6.3228e-01, PNorm = 40.2273, GNorm = 6.0613, lr_0 = 9.2538e-04
Loss = 5.7273e-01, PNorm = 40.2443, GNorm = 4.4595, lr_0 = 9.2913e-04
Loss = 5.3721e-01, PNorm = 40.2704, GNorm = 1.7515, lr_0 = 9.3288e-04
Loss = 5.2822e-01, PNorm = 40.2837, GNorm = 1.7470, lr_0 = 9.3663e-04
Loss = 4.7879e-01, PNorm = 40.2984, GNorm = 1.7893, lr_0 = 9.4038e-04
Loss = 5.2692e-01, PNorm = 40.3182, GNorm = 2.4278, lr_0 = 9.4413e-04
Loss = 5.3717e-01, PNorm = 40.3413, GNorm = 3.0602, lr_0 = 9.4788e-04
Loss = 5.8212e-01, PNorm = 40.3577, GNorm = 1.3996, lr_0 = 9.5163e-04
Loss = 6.1727e-01, PNorm = 40.3663, GNorm = 1.9861, lr_0 = 9.5538e-04
Loss = 5.9261e-01, PNorm = 40.3828, GNorm = 1.1618, lr_0 = 9.5913e-04
Loss = 5.4210e-01, PNorm = 40.4025, GNorm = 0.8022, lr_0 = 9.6288e-04
Loss = 5.8445e-01, PNorm = 40.4252, GNorm = 3.1372, lr_0 = 9.6663e-04
Loss = 5.8726e-01, PNorm = 40.4411, GNorm = 3.1863, lr_0 = 9.7038e-04
Loss = 5.7453e-01, PNorm = 40.4620, GNorm = 1.3888, lr_0 = 9.7413e-04
Loss = 5.5054e-01, PNorm = 40.4897, GNorm = 1.7270, lr_0 = 9.7788e-04
Loss = 4.9896e-01, PNorm = 40.5082, GNorm = 1.2472, lr_0 = 9.8163e-04
Loss = 5.8314e-01, PNorm = 40.5324, GNorm = 2.4812, lr_0 = 9.8537e-04
Loss = 5.7034e-01, PNorm = 40.5465, GNorm = 1.3721, lr_0 = 9.8912e-04
Loss = 6.2889e-01, PNorm = 40.5683, GNorm = 2.4823, lr_0 = 9.9288e-04
Loss = 5.7140e-01, PNorm = 40.5873, GNorm = 0.9961, lr_0 = 9.9663e-04
Loss = 5.6067e-01, PNorm = 40.6083, GNorm = 1.2269, lr_0 = 9.9993e-04
Validation mae = 0.130559
Epoch 2
Loss = 6.0309e-01, PNorm = 40.6349, GNorm = 1.3327, lr_0 = 9.9925e-04
Loss = 4.8258e-01, PNorm = 40.6561, GNorm = 0.8390, lr_0 = 9.9856e-04
Loss = 4.2722e-01, PNorm = 40.6795, GNorm = 2.4511, lr_0 = 9.9788e-04
Loss = 5.4824e-01, PNorm = 40.7054, GNorm = 5.0874, lr_0 = 9.9719e-04
Loss = 5.7260e-01, PNorm = 40.7274, GNorm = 2.5894, lr_0 = 9.9651e-04
Loss = 5.9124e-01, PNorm = 40.7597, GNorm = 4.0024, lr_0 = 9.9583e-04
Loss = 5.0255e-01, PNorm = 40.7824, GNorm = 1.9151, lr_0 = 9.9515e-04
Loss = 5.5332e-01, PNorm = 40.8097, GNorm = 0.9458, lr_0 = 9.9446e-04
Loss = 5.2203e-01, PNorm = 40.8342, GNorm = 2.7610, lr_0 = 9.9378e-04
Loss = 5.3826e-01, PNorm = 40.8510, GNorm = 1.7462, lr_0 = 9.9310e-04
Loss = 5.5154e-01, PNorm = 40.8734, GNorm = 1.2136, lr_0 = 9.9242e-04
Loss = 5.0635e-01, PNorm = 40.8981, GNorm = 3.2141, lr_0 = 9.9174e-04
Loss = 6.5217e-01, PNorm = 40.9210, GNorm = 1.6309, lr_0 = 9.9106e-04
Loss = 5.3109e-01, PNorm = 40.9420, GNorm = 2.5404, lr_0 = 9.9038e-04
Loss = 5.9500e-01, PNorm = 40.9585, GNorm = 1.3616, lr_0 = 9.8971e-04
Loss = 5.6776e-01, PNorm = 40.9728, GNorm = 2.8723, lr_0 = 9.8903e-04
Loss = 5.5015e-01, PNorm = 40.9970, GNorm = 1.6395, lr_0 = 9.8835e-04
Loss = 4.7990e-01, PNorm = 41.0171, GNorm = 1.6271, lr_0 = 9.8767e-04
Loss = 4.7326e-01, PNorm = 41.0404, GNorm = 1.8286, lr_0 = 9.8700e-04
Loss = 5.1206e-01, PNorm = 41.0443, GNorm = 1.0101, lr_0 = 9.8632e-04
Loss = 5.8752e-01, PNorm = 41.0575, GNorm = 1.1473, lr_0 = 9.8564e-04
Loss = 5.5867e-01, PNorm = 41.0792, GNorm = 1.3840, lr_0 = 9.8497e-04
Loss = 5.3691e-01, PNorm = 41.1041, GNorm = 1.2240, lr_0 = 9.8429e-04
Loss = 5.8886e-01, PNorm = 41.1150, GNorm = 1.2434, lr_0 = 9.8362e-04
Loss = 5.6571e-01, PNorm = 41.1347, GNorm = 2.5447, lr_0 = 9.8295e-04
Loss = 5.4384e-01, PNorm = 41.1533, GNorm = 1.6285, lr_0 = 9.8227e-04
Loss = 4.9256e-01, PNorm = 41.1666, GNorm = 2.1765, lr_0 = 9.8160e-04
Loss = 5.2840e-01, PNorm = 41.1812, GNorm = 3.8415, lr_0 = 9.8093e-04
Loss = 5.4742e-01, PNorm = 41.2019, GNorm = 1.7942, lr_0 = 9.8026e-04
Loss = 6.0498e-01, PNorm = 41.2223, GNorm = 2.9087, lr_0 = 9.7958e-04
Loss = 5.6881e-01, PNorm = 41.2410, GNorm = 3.7951, lr_0 = 9.7891e-04
Loss = 6.0365e-01, PNorm = 41.2598, GNorm = 1.2326, lr_0 = 9.7824e-04
Loss = 5.5990e-01, PNorm = 41.2824, GNorm = 3.1455, lr_0 = 9.7757e-04
Loss = 5.7245e-01, PNorm = 41.2984, GNorm = 3.7034, lr_0 = 9.7690e-04
Loss = 5.8005e-01, PNorm = 41.3223, GNorm = 1.2583, lr_0 = 9.7623e-04
Loss = 4.4983e-01, PNorm = 41.3418, GNorm = 1.1501, lr_0 = 9.7556e-04
Loss = 5.1524e-01, PNorm = 41.3577, GNorm = 2.7006, lr_0 = 9.7490e-04
Loss = 5.7836e-01, PNorm = 41.3828, GNorm = 2.8439, lr_0 = 9.7423e-04
Loss = 6.0840e-01, PNorm = 41.3994, GNorm = 1.6191, lr_0 = 9.7356e-04
Loss = 5.7063e-01, PNorm = 41.4191, GNorm = 2.0112, lr_0 = 9.7289e-04
Loss = 5.2760e-01, PNorm = 41.4456, GNorm = 2.0739, lr_0 = 9.7223e-04
Loss = 5.2926e-01, PNorm = 41.4719, GNorm = 1.7229, lr_0 = 9.7156e-04
Loss = 4.9734e-01, PNorm = 41.4868, GNorm = 1.1347, lr_0 = 9.7090e-04
Loss = 5.0046e-01, PNorm = 41.5035, GNorm = 1.2210, lr_0 = 9.7023e-04
Loss = 5.6409e-01, PNorm = 41.5208, GNorm = 1.4059, lr_0 = 9.6957e-04
Loss = 5.8871e-01, PNorm = 41.5344, GNorm = 1.2764, lr_0 = 9.6890e-04
Loss = 5.0288e-01, PNorm = 41.5516, GNorm = 2.5194, lr_0 = 9.6824e-04
Loss = 5.4742e-01, PNorm = 41.5697, GNorm = 2.0200, lr_0 = 9.6757e-04
Loss = 5.3668e-01, PNorm = 41.5892, GNorm = 3.1007, lr_0 = 9.6691e-04
Loss = 5.4431e-01, PNorm = 41.5955, GNorm = 1.3845, lr_0 = 9.6625e-04
Loss = 4.9526e-01, PNorm = 41.6131, GNorm = 1.5051, lr_0 = 9.6559e-04
Loss = 5.7962e-01, PNorm = 41.6315, GNorm = 2.7403, lr_0 = 9.6493e-04
Loss = 5.2543e-01, PNorm = 41.6559, GNorm = 1.7988, lr_0 = 9.6427e-04
Loss = 5.5871e-01, PNorm = 41.6846, GNorm = 1.3822, lr_0 = 9.6360e-04
Loss = 5.2404e-01, PNorm = 41.7130, GNorm = 2.6296, lr_0 = 9.6294e-04
Loss = 6.2275e-01, PNorm = 41.7371, GNorm = 1.7966, lr_0 = 9.6228e-04
Loss = 4.9597e-01, PNorm = 41.7498, GNorm = 3.3930, lr_0 = 9.6163e-04
Loss = 5.0611e-01, PNorm = 41.7684, GNorm = 1.5253, lr_0 = 9.6097e-04
Loss = 6.4733e-01, PNorm = 41.7973, GNorm = 2.0179, lr_0 = 9.6031e-04
Loss = 5.3666e-01, PNorm = 41.8227, GNorm = 1.7591, lr_0 = 9.5965e-04
Loss = 5.7816e-01, PNorm = 41.8366, GNorm = 3.6548, lr_0 = 9.5899e-04
Loss = 4.7244e-01, PNorm = 41.8550, GNorm = 0.9980, lr_0 = 9.5834e-04
Loss = 6.1547e-01, PNorm = 41.8724, GNorm = 1.3047, lr_0 = 9.5768e-04
Loss = 5.9089e-01, PNorm = 41.8838, GNorm = 2.2850, lr_0 = 9.5702e-04
Loss = 6.7958e-01, PNorm = 41.9108, GNorm = 0.9148, lr_0 = 9.5637e-04
Loss = 5.4328e-01, PNorm = 41.9466, GNorm = 2.8958, lr_0 = 9.5571e-04
Loss = 5.7640e-01, PNorm = 41.9689, GNorm = 1.1404, lr_0 = 9.5506e-04
Loss = 5.8380e-01, PNorm = 41.9923, GNorm = 1.3661, lr_0 = 9.5440e-04
Loss = 5.1167e-01, PNorm = 42.0163, GNorm = 2.7968, lr_0 = 9.5375e-04
Loss = 6.4481e-01, PNorm = 42.0382, GNorm = 1.0340, lr_0 = 9.5310e-04
Loss = 7.1319e-01, PNorm = 42.0583, GNorm = 2.6454, lr_0 = 9.5244e-04
Loss = 5.4286e-01, PNorm = 42.0796, GNorm = 1.2783, lr_0 = 9.5179e-04
Loss = 5.1010e-01, PNorm = 42.1045, GNorm = 1.9528, lr_0 = 9.5114e-04
Loss = 5.6258e-01, PNorm = 42.1227, GNorm = 1.6435, lr_0 = 9.5049e-04
Loss = 5.2774e-01, PNorm = 42.1508, GNorm = 1.6855, lr_0 = 9.4984e-04
Loss = 5.2106e-01, PNorm = 42.1578, GNorm = 1.6068, lr_0 = 9.4919e-04
Loss = 6.0983e-01, PNorm = 42.1765, GNorm = 1.4223, lr_0 = 9.4854e-04
Loss = 4.4809e-01, PNorm = 42.2029, GNorm = 1.2222, lr_0 = 9.4789e-04
Loss = 5.1594e-01, PNorm = 42.2172, GNorm = 1.8755, lr_0 = 9.4724e-04
Loss = 4.2080e-01, PNorm = 42.2315, GNorm = 2.1845, lr_0 = 9.4659e-04
Loss = 4.7224e-01, PNorm = 42.2531, GNorm = 2.7280, lr_0 = 9.4594e-04
Loss = 6.0827e-01, PNorm = 42.2757, GNorm = 1.7368, lr_0 = 9.4529e-04
Loss = 5.8976e-01, PNorm = 42.3024, GNorm = 2.3982, lr_0 = 9.4464e-04
Loss = 5.1375e-01, PNorm = 42.3179, GNorm = 1.2439, lr_0 = 9.4400e-04
Loss = 5.5560e-01, PNorm = 42.3349, GNorm = 2.9959, lr_0 = 9.4335e-04
Loss = 4.5523e-01, PNorm = 42.3544, GNorm = 1.7587, lr_0 = 9.4270e-04
Loss = 5.3886e-01, PNorm = 42.3669, GNorm = 1.8654, lr_0 = 9.4206e-04
Loss = 5.3454e-01, PNorm = 42.3851, GNorm = 1.9887, lr_0 = 9.4141e-04
Loss = 5.8356e-01, PNorm = 42.4049, GNorm = 1.3403, lr_0 = 9.4077e-04
Loss = 5.4646e-01, PNorm = 42.4150, GNorm = 1.4738, lr_0 = 9.4012e-04
Loss = 5.8456e-01, PNorm = 42.4357, GNorm = 1.0889, lr_0 = 9.3948e-04
Loss = 5.3403e-01, PNorm = 42.4656, GNorm = 2.8260, lr_0 = 9.3884e-04
Loss = 5.4272e-01, PNorm = 42.4786, GNorm = 1.8252, lr_0 = 9.3819e-04
Loss = 5.1960e-01, PNorm = 42.4967, GNorm = 1.5266, lr_0 = 9.3755e-04
Loss = 4.8864e-01, PNorm = 42.5117, GNorm = 1.0626, lr_0 = 9.3691e-04
Loss = 5.2608e-01, PNorm = 42.5303, GNorm = 2.7273, lr_0 = 9.3627e-04
Loss = 5.0966e-01, PNorm = 42.5418, GNorm = 1.0084, lr_0 = 9.3562e-04
Loss = 5.4387e-01, PNorm = 42.5618, GNorm = 1.1588, lr_0 = 9.3498e-04
Loss = 5.6474e-01, PNorm = 42.5890, GNorm = 2.6398, lr_0 = 9.3434e-04
Loss = 5.7650e-01, PNorm = 42.6023, GNorm = 1.8700, lr_0 = 9.3370e-04
Loss = 5.4006e-01, PNorm = 42.6210, GNorm = 1.1570, lr_0 = 9.3306e-04
Loss = 5.1413e-01, PNorm = 42.6424, GNorm = 1.2582, lr_0 = 9.3242e-04
Loss = 5.9438e-01, PNorm = 42.6613, GNorm = 3.2823, lr_0 = 9.3178e-04
Loss = 5.3460e-01, PNorm = 42.6836, GNorm = 1.1086, lr_0 = 9.3115e-04
Loss = 4.6661e-01, PNorm = 42.7054, GNorm = 2.9240, lr_0 = 9.3051e-04
Loss = 5.0011e-01, PNorm = 42.7260, GNorm = 2.2153, lr_0 = 9.2987e-04
Loss = 5.6218e-01, PNorm = 42.7465, GNorm = 2.5927, lr_0 = 9.2923e-04
Loss = 5.4739e-01, PNorm = 42.7709, GNorm = 2.2362, lr_0 = 9.2860e-04
Loss = 4.8382e-01, PNorm = 42.7883, GNorm = 1.3281, lr_0 = 9.2796e-04
Loss = 5.4175e-01, PNorm = 42.8050, GNorm = 4.3548, lr_0 = 9.2733e-04
Loss = 5.7399e-01, PNorm = 42.8219, GNorm = 2.1508, lr_0 = 9.2669e-04
Loss = 5.2216e-01, PNorm = 42.8487, GNorm = 1.8034, lr_0 = 9.2606e-04
Loss = 5.9557e-01, PNorm = 42.8723, GNorm = 0.7752, lr_0 = 9.2542e-04
Loss = 4.9574e-01, PNorm = 42.8885, GNorm = 2.4094, lr_0 = 9.2479e-04
Loss = 4.5421e-01, PNorm = 42.9047, GNorm = 4.6297, lr_0 = 9.2415e-04
Loss = 5.4085e-01, PNorm = 42.9147, GNorm = 1.4348, lr_0 = 9.2352e-04
Loss = 5.4182e-01, PNorm = 42.9320, GNorm = 1.9698, lr_0 = 9.2289e-04
Loss = 5.4753e-01, PNorm = 42.9480, GNorm = 1.4501, lr_0 = 9.2226e-04
Loss = 5.4783e-01, PNorm = 42.9607, GNorm = 2.1453, lr_0 = 9.2162e-04
Loss = 4.8140e-01, PNorm = 42.9778, GNorm = 0.9351, lr_0 = 9.2099e-04
Validation mae = 0.128922
Epoch 3
Loss = 4.7475e-01, PNorm = 42.9968, GNorm = 1.1913, lr_0 = 9.2036e-04
Loss = 4.7344e-01, PNorm = 43.0200, GNorm = 1.1078, lr_0 = 9.1973e-04
Loss = 4.6910e-01, PNorm = 43.0457, GNorm = 1.8230, lr_0 = 9.1910e-04
Loss = 4.7038e-01, PNorm = 43.0619, GNorm = 1.3040, lr_0 = 9.1847e-04
Loss = 4.5627e-01, PNorm = 43.0824, GNorm = 2.3445, lr_0 = 9.1784e-04
Loss = 5.0221e-01, PNorm = 43.1053, GNorm = 2.0264, lr_0 = 9.1721e-04
Loss = 5.8821e-01, PNorm = 43.1270, GNorm = 1.3236, lr_0 = 9.1658e-04
Loss = 5.1286e-01, PNorm = 43.1409, GNorm = 1.3072, lr_0 = 9.1596e-04
Loss = 5.1924e-01, PNorm = 43.1579, GNorm = 2.0818, lr_0 = 9.1533e-04
Loss = 5.4287e-01, PNorm = 43.1769, GNorm = 3.7818, lr_0 = 9.1470e-04
Loss = 4.6742e-01, PNorm = 43.2009, GNorm = 1.7621, lr_0 = 9.1408e-04
Loss = 6.0400e-01, PNorm = 43.2207, GNorm = 1.1974, lr_0 = 9.1345e-04
Loss = 6.2623e-01, PNorm = 43.2371, GNorm = 1.8801, lr_0 = 9.1282e-04
Loss = 5.4193e-01, PNorm = 43.2600, GNorm = 1.6786, lr_0 = 9.1220e-04
Loss = 4.7194e-01, PNorm = 43.2857, GNorm = 1.5535, lr_0 = 9.1157e-04
Loss = 5.4460e-01, PNorm = 43.3162, GNorm = 2.1281, lr_0 = 9.1095e-04
Loss = 4.7770e-01, PNorm = 43.3359, GNorm = 4.3991, lr_0 = 9.1032e-04
Loss = 4.6528e-01, PNorm = 43.3553, GNorm = 1.3435, lr_0 = 9.0970e-04
Loss = 4.7192e-01, PNorm = 43.3755, GNorm = 1.7604, lr_0 = 9.0908e-04
Loss = 6.1386e-01, PNorm = 43.3973, GNorm = 1.2575, lr_0 = 9.0846e-04
Loss = 4.9996e-01, PNorm = 43.4176, GNorm = 4.4829, lr_0 = 9.0783e-04
Loss = 6.6742e-01, PNorm = 43.4341, GNorm = 3.4368, lr_0 = 9.0721e-04
Loss = 5.4267e-01, PNorm = 43.4580, GNorm = 1.1841, lr_0 = 9.0659e-04
Loss = 5.0480e-01, PNorm = 43.4819, GNorm = 2.4245, lr_0 = 9.0597e-04
Loss = 5.2709e-01, PNorm = 43.5011, GNorm = 2.4508, lr_0 = 9.0535e-04
Loss = 5.5395e-01, PNorm = 43.5244, GNorm = 3.9381, lr_0 = 9.0473e-04
Loss = 4.7377e-01, PNorm = 43.5410, GNorm = 1.9369, lr_0 = 9.0411e-04
Loss = 5.1142e-01, PNorm = 43.5604, GNorm = 1.3784, lr_0 = 9.0349e-04
Loss = 5.0240e-01, PNorm = 43.5725, GNorm = 1.8650, lr_0 = 9.0287e-04
Loss = 5.0797e-01, PNorm = 43.5868, GNorm = 1.0787, lr_0 = 9.0225e-04
Loss = 4.9822e-01, PNorm = 43.5975, GNorm = 1.7192, lr_0 = 9.0163e-04
Loss = 5.0237e-01, PNorm = 43.6168, GNorm = 1.2414, lr_0 = 9.0102e-04
Loss = 5.7420e-01, PNorm = 43.6427, GNorm = 3.4152, lr_0 = 9.0040e-04
Loss = 4.5574e-01, PNorm = 43.6653, GNorm = 2.3630, lr_0 = 8.9978e-04
Loss = 4.9832e-01, PNorm = 43.6900, GNorm = 1.2406, lr_0 = 8.9916e-04
Loss = 5.3439e-01, PNorm = 43.7057, GNorm = 1.7150, lr_0 = 8.9855e-04
Loss = 5.2405e-01, PNorm = 43.7305, GNorm = 1.6859, lr_0 = 8.9793e-04
Loss = 5.1694e-01, PNorm = 43.7462, GNorm = 2.1190, lr_0 = 8.9732e-04
Loss = 5.1137e-01, PNorm = 43.7647, GNorm = 1.5359, lr_0 = 8.9670e-04
Loss = 5.1000e-01, PNorm = 43.7739, GNorm = 0.9931, lr_0 = 8.9609e-04
Loss = 5.3901e-01, PNorm = 43.7858, GNorm = 1.1907, lr_0 = 8.9548e-04
Loss = 5.2925e-01, PNorm = 43.8021, GNorm = 1.1415, lr_0 = 8.9486e-04
Loss = 5.5385e-01, PNorm = 43.8151, GNorm = 1.6923, lr_0 = 8.9425e-04
Loss = 4.8801e-01, PNorm = 43.8366, GNorm = 1.5864, lr_0 = 8.9364e-04
Loss = 4.9840e-01, PNorm = 43.8587, GNorm = 1.7373, lr_0 = 8.9302e-04
Loss = 4.7394e-01, PNorm = 43.8737, GNorm = 1.1627, lr_0 = 8.9241e-04
Loss = 4.7949e-01, PNorm = 43.8843, GNorm = 1.7907, lr_0 = 8.9180e-04
Loss = 5.1602e-01, PNorm = 43.8984, GNorm = 2.0652, lr_0 = 8.9119e-04
Loss = 4.9332e-01, PNorm = 43.9109, GNorm = 1.6066, lr_0 = 8.9058e-04
Loss = 4.6947e-01, PNorm = 43.9303, GNorm = 1.8676, lr_0 = 8.8997e-04
Loss = 5.1589e-01, PNorm = 43.9491, GNorm = 2.0525, lr_0 = 8.8936e-04
Loss = 5.1487e-01, PNorm = 43.9674, GNorm = 2.1094, lr_0 = 8.8875e-04
Loss = 5.6003e-01, PNorm = 43.9871, GNorm = 1.8404, lr_0 = 8.8814e-04
Loss = 4.8834e-01, PNorm = 44.0099, GNorm = 1.6598, lr_0 = 8.8753e-04
Loss = 4.4640e-01, PNorm = 44.0240, GNorm = 1.0071, lr_0 = 8.8693e-04
Loss = 4.7192e-01, PNorm = 44.0427, GNorm = 2.0888, lr_0 = 8.8632e-04
Loss = 4.6701e-01, PNorm = 44.0618, GNorm = 1.3775, lr_0 = 8.8571e-04
Loss = 4.8110e-01, PNorm = 44.0787, GNorm = 1.1020, lr_0 = 8.8510e-04
Loss = 4.9478e-01, PNorm = 44.0950, GNorm = 1.6859, lr_0 = 8.8450e-04
Loss = 5.0526e-01, PNorm = 44.1170, GNorm = 1.7658, lr_0 = 8.8389e-04
Loss = 4.5319e-01, PNorm = 44.1399, GNorm = 1.1386, lr_0 = 8.8329e-04
Loss = 4.7344e-01, PNorm = 44.1656, GNorm = 1.4405, lr_0 = 8.8268e-04
Loss = 5.1958e-01, PNorm = 44.1845, GNorm = 1.6814, lr_0 = 8.8208e-04
Loss = 5.1166e-01, PNorm = 44.2000, GNorm = 1.1304, lr_0 = 8.8147e-04
Loss = 5.2572e-01, PNorm = 44.2189, GNorm = 2.2468, lr_0 = 8.8087e-04
Loss = 4.8925e-01, PNorm = 44.2389, GNorm = 2.8404, lr_0 = 8.8026e-04
Loss = 5.7400e-01, PNorm = 44.2599, GNorm = 2.1075, lr_0 = 8.7966e-04
Loss = 5.3776e-01, PNorm = 44.2884, GNorm = 1.6783, lr_0 = 8.7906e-04
Loss = 4.9451e-01, PNorm = 44.3215, GNorm = 2.5340, lr_0 = 8.7846e-04
Loss = 5.1962e-01, PNorm = 44.3462, GNorm = 2.4241, lr_0 = 8.7785e-04
Loss = 5.7136e-01, PNorm = 44.3677, GNorm = 4.1964, lr_0 = 8.7725e-04
Loss = 4.8955e-01, PNorm = 44.3852, GNorm = 2.4018, lr_0 = 8.7665e-04
Loss = 5.6991e-01, PNorm = 44.4133, GNorm = 1.0959, lr_0 = 8.7605e-04
Loss = 5.0920e-01, PNorm = 44.4368, GNorm = 1.1067, lr_0 = 8.7545e-04
Loss = 5.0119e-01, PNorm = 44.4466, GNorm = 0.9507, lr_0 = 8.7485e-04
Loss = 4.7311e-01, PNorm = 44.4630, GNorm = 1.7378, lr_0 = 8.7425e-04
Loss = 5.2717e-01, PNorm = 44.4835, GNorm = 1.3940, lr_0 = 8.7365e-04
Loss = 4.9301e-01, PNorm = 44.5029, GNorm = 1.9822, lr_0 = 8.7306e-04
Loss = 4.7277e-01, PNorm = 44.5156, GNorm = 1.7116, lr_0 = 8.7246e-04
Loss = 5.0671e-01, PNorm = 44.5358, GNorm = 2.6842, lr_0 = 8.7186e-04
Loss = 4.8484e-01, PNorm = 44.5534, GNorm = 1.3247, lr_0 = 8.7126e-04
Loss = 5.0467e-01, PNorm = 44.5717, GNorm = 2.0345, lr_0 = 8.7067e-04
Loss = 6.5414e-01, PNorm = 44.5869, GNorm = 1.6540, lr_0 = 8.7007e-04
Loss = 5.9181e-01, PNorm = 44.6070, GNorm = 1.9383, lr_0 = 8.6947e-04
Loss = 4.8090e-01, PNorm = 44.6407, GNorm = 0.9079, lr_0 = 8.6888e-04
Loss = 4.6215e-01, PNorm = 44.6656, GNorm = 2.4327, lr_0 = 8.6828e-04
Loss = 4.9188e-01, PNorm = 44.6819, GNorm = 2.6467, lr_0 = 8.6769e-04
Loss = 5.2072e-01, PNorm = 44.7044, GNorm = 0.7579, lr_0 = 8.6709e-04
Loss = 4.9804e-01, PNorm = 44.7266, GNorm = 1.2076, lr_0 = 8.6650e-04
Loss = 4.8030e-01, PNorm = 44.7370, GNorm = 1.3065, lr_0 = 8.6590e-04
Loss = 5.0442e-01, PNorm = 44.7611, GNorm = 2.5206, lr_0 = 8.6531e-04
Loss = 4.9503e-01, PNorm = 44.7818, GNorm = 2.6773, lr_0 = 8.6472e-04
Loss = 4.5752e-01, PNorm = 44.7952, GNorm = 1.2535, lr_0 = 8.6413e-04
Loss = 4.6904e-01, PNorm = 44.8176, GNorm = 1.1347, lr_0 = 8.6353e-04
Loss = 5.2635e-01, PNorm = 44.8395, GNorm = 1.2876, lr_0 = 8.6294e-04
Loss = 4.7692e-01, PNorm = 44.8501, GNorm = 1.5691, lr_0 = 8.6235e-04
Loss = 5.3796e-01, PNorm = 44.8653, GNorm = 1.1234, lr_0 = 8.6176e-04
Loss = 5.0922e-01, PNorm = 44.8884, GNorm = 1.0143, lr_0 = 8.6117e-04
Loss = 5.3541e-01, PNorm = 44.9018, GNorm = 1.3699, lr_0 = 8.6058e-04
Loss = 4.4430e-01, PNorm = 44.9182, GNorm = 2.0326, lr_0 = 8.5999e-04
Loss = 4.7880e-01, PNorm = 44.9406, GNorm = 3.8167, lr_0 = 8.5940e-04
Loss = 5.4009e-01, PNorm = 44.9571, GNorm = 1.3324, lr_0 = 8.5881e-04
Loss = 5.1017e-01, PNorm = 44.9821, GNorm = 2.1080, lr_0 = 8.5823e-04
Loss = 5.7744e-01, PNorm = 45.0072, GNorm = 2.8098, lr_0 = 8.5764e-04
Loss = 5.1290e-01, PNorm = 45.0379, GNorm = 1.4673, lr_0 = 8.5705e-04
Loss = 5.6748e-01, PNorm = 45.0513, GNorm = 0.9109, lr_0 = 8.5646e-04
Loss = 4.9932e-01, PNorm = 45.0735, GNorm = 0.9840, lr_0 = 8.5588e-04
Loss = 4.6661e-01, PNorm = 45.0930, GNorm = 1.1770, lr_0 = 8.5529e-04
Loss = 5.5088e-01, PNorm = 45.1192, GNorm = 1.2326, lr_0 = 8.5470e-04
Loss = 4.8607e-01, PNorm = 45.1319, GNorm = 1.2298, lr_0 = 8.5412e-04
Loss = 4.6063e-01, PNorm = 45.1458, GNorm = 2.9259, lr_0 = 8.5353e-04
Loss = 5.4980e-01, PNorm = 45.1642, GNorm = 1.9132, lr_0 = 8.5295e-04
Loss = 5.1777e-01, PNorm = 45.1869, GNorm = 2.7716, lr_0 = 8.5236e-04
Loss = 5.2235e-01, PNorm = 45.2009, GNorm = 1.0780, lr_0 = 8.5178e-04
Loss = 5.1232e-01, PNorm = 45.2292, GNorm = 1.2211, lr_0 = 8.5120e-04
Loss = 4.6891e-01, PNorm = 45.2575, GNorm = 2.2951, lr_0 = 8.5061e-04
Loss = 5.1729e-01, PNorm = 45.2713, GNorm = 3.2242, lr_0 = 8.5003e-04
Loss = 4.8333e-01, PNorm = 45.2904, GNorm = 2.1906, lr_0 = 8.4945e-04
Loss = 4.2338e-01, PNorm = 45.3151, GNorm = 1.4505, lr_0 = 8.4887e-04
Loss = 4.9779e-01, PNorm = 45.3346, GNorm = 2.4508, lr_0 = 8.4828e-04
Validation mae = 0.123982
Epoch 4
Loss = 5.8762e-01, PNorm = 45.3522, GNorm = 3.2536, lr_0 = 8.4770e-04
Loss = 4.9807e-01, PNorm = 45.3741, GNorm = 1.4045, lr_0 = 8.4712e-04
Loss = 4.3384e-01, PNorm = 45.3954, GNorm = 1.0522, lr_0 = 8.4654e-04
Loss = 5.6512e-01, PNorm = 45.4233, GNorm = 3.0405, lr_0 = 8.4596e-04
Loss = 5.2894e-01, PNorm = 45.4407, GNorm = 1.4958, lr_0 = 8.4538e-04
Loss = 4.9950e-01, PNorm = 45.4644, GNorm = 1.5927, lr_0 = 8.4480e-04
Loss = 5.6723e-01, PNorm = 45.4824, GNorm = 1.2291, lr_0 = 8.4423e-04
Loss = 5.0798e-01, PNorm = 45.5005, GNorm = 3.6625, lr_0 = 8.4365e-04
Loss = 4.9851e-01, PNorm = 45.5251, GNorm = 1.3235, lr_0 = 8.4307e-04
Loss = 4.7765e-01, PNorm = 45.5456, GNorm = 1.9574, lr_0 = 8.4249e-04
Loss = 4.8375e-01, PNorm = 45.5631, GNorm = 0.8638, lr_0 = 8.4191e-04
Loss = 5.4276e-01, PNorm = 45.5806, GNorm = 2.6388, lr_0 = 8.4134e-04
Loss = 5.1993e-01, PNorm = 45.5952, GNorm = 1.4938, lr_0 = 8.4076e-04
Loss = 4.7363e-01, PNorm = 45.6063, GNorm = 1.4723, lr_0 = 8.4019e-04
Loss = 4.6151e-01, PNorm = 45.6153, GNorm = 1.1365, lr_0 = 8.3961e-04
Loss = 4.4598e-01, PNorm = 45.6326, GNorm = 1.6318, lr_0 = 8.3903e-04
Loss = 5.0132e-01, PNorm = 45.6404, GNorm = 1.3984, lr_0 = 8.3846e-04
Loss = 5.2436e-01, PNorm = 45.6607, GNorm = 2.6473, lr_0 = 8.3789e-04
Loss = 5.2980e-01, PNorm = 45.6808, GNorm = 2.2215, lr_0 = 8.3731e-04
Loss = 5.3908e-01, PNorm = 45.7033, GNorm = 3.2983, lr_0 = 8.3674e-04
Loss = 5.5127e-01, PNorm = 45.7203, GNorm = 2.1605, lr_0 = 8.3616e-04
Loss = 5.0962e-01, PNorm = 45.7485, GNorm = 1.9364, lr_0 = 8.3559e-04
Loss = 4.9831e-01, PNorm = 45.7745, GNorm = 1.6082, lr_0 = 8.3502e-04
Loss = 4.7083e-01, PNorm = 45.7950, GNorm = 0.9653, lr_0 = 8.3445e-04
Loss = 3.9426e-01, PNorm = 45.8133, GNorm = 1.0395, lr_0 = 8.3388e-04
Loss = 5.0464e-01, PNorm = 45.8304, GNorm = 1.4939, lr_0 = 8.3330e-04
Loss = 4.9639e-01, PNorm = 45.8528, GNorm = 2.0224, lr_0 = 8.3273e-04
Loss = 4.6882e-01, PNorm = 45.8730, GNorm = 2.2779, lr_0 = 8.3216e-04
Loss = 4.5982e-01, PNorm = 45.8913, GNorm = 1.5058, lr_0 = 8.3159e-04
Loss = 4.7316e-01, PNorm = 45.9061, GNorm = 1.3298, lr_0 = 8.3102e-04
Loss = 5.2249e-01, PNorm = 45.9293, GNorm = 0.8571, lr_0 = 8.3045e-04
Loss = 5.2551e-01, PNorm = 45.9498, GNorm = 1.1913, lr_0 = 8.2988e-04
Loss = 4.8214e-01, PNorm = 45.9679, GNorm = 1.2218, lr_0 = 8.2932e-04
Loss = 4.7073e-01, PNorm = 45.9881, GNorm = 2.4195, lr_0 = 8.2875e-04
Loss = 4.1481e-01, PNorm = 46.0089, GNorm = 1.0245, lr_0 = 8.2818e-04
Loss = 4.6630e-01, PNorm = 46.0188, GNorm = 0.9068, lr_0 = 8.2761e-04
Loss = 4.8847e-01, PNorm = 46.0329, GNorm = 1.6393, lr_0 = 8.2705e-04
Loss = 4.2294e-01, PNorm = 46.0467, GNorm = 1.0103, lr_0 = 8.2648e-04
Loss = 4.8521e-01, PNorm = 46.0629, GNorm = 1.5077, lr_0 = 8.2591e-04
Loss = 4.4777e-01, PNorm = 46.0811, GNorm = 1.1989, lr_0 = 8.2535e-04
Loss = 4.4354e-01, PNorm = 46.0967, GNorm = 1.1781, lr_0 = 8.2478e-04
Loss = 4.6928e-01, PNorm = 46.1104, GNorm = 1.7859, lr_0 = 8.2422e-04
Loss = 4.6768e-01, PNorm = 46.1231, GNorm = 1.2228, lr_0 = 8.2365e-04
Loss = 4.6963e-01, PNorm = 46.1414, GNorm = 2.8775, lr_0 = 8.2309e-04
Loss = 5.0936e-01, PNorm = 46.1570, GNorm = 1.5101, lr_0 = 8.2252e-04
Loss = 4.9334e-01, PNorm = 46.1851, GNorm = 2.9598, lr_0 = 8.2196e-04
Loss = 4.5893e-01, PNorm = 46.2063, GNorm = 1.3861, lr_0 = 8.2140e-04
Loss = 5.2852e-01, PNorm = 46.2229, GNorm = 1.2102, lr_0 = 8.2084e-04
Loss = 5.1978e-01, PNorm = 46.2422, GNorm = 2.3166, lr_0 = 8.2027e-04
Loss = 4.5836e-01, PNorm = 46.2506, GNorm = 0.8343, lr_0 = 8.1971e-04
Loss = 4.8943e-01, PNorm = 46.2660, GNorm = 1.4927, lr_0 = 8.1915e-04
Loss = 4.9351e-01, PNorm = 46.2824, GNorm = 0.9554, lr_0 = 8.1859e-04
Loss = 4.1577e-01, PNorm = 46.2986, GNorm = 1.6953, lr_0 = 8.1803e-04
Loss = 4.3458e-01, PNorm = 46.3222, GNorm = 1.2586, lr_0 = 8.1747e-04
Loss = 5.2760e-01, PNorm = 46.3384, GNorm = 1.9558, lr_0 = 8.1691e-04
Loss = 5.3113e-01, PNorm = 46.3584, GNorm = 2.8583, lr_0 = 8.1635e-04
Loss = 5.4843e-01, PNorm = 46.3836, GNorm = 2.5998, lr_0 = 8.1579e-04
Loss = 5.4898e-01, PNorm = 46.4102, GNorm = 1.6170, lr_0 = 8.1523e-04
Loss = 5.5839e-01, PNorm = 46.4288, GNorm = 1.4819, lr_0 = 8.1467e-04
Loss = 4.5799e-01, PNorm = 46.4516, GNorm = 1.8122, lr_0 = 8.1411e-04
Loss = 4.4813e-01, PNorm = 46.4727, GNorm = 0.8363, lr_0 = 8.1355e-04
Loss = 5.1220e-01, PNorm = 46.4954, GNorm = 1.1757, lr_0 = 8.1300e-04
Loss = 4.5885e-01, PNorm = 46.5203, GNorm = 1.2927, lr_0 = 8.1244e-04
Loss = 4.5764e-01, PNorm = 46.5304, GNorm = 1.2987, lr_0 = 8.1188e-04
Loss = 4.2517e-01, PNorm = 46.5437, GNorm = 2.3780, lr_0 = 8.1133e-04
Loss = 4.8695e-01, PNorm = 46.5624, GNorm = 1.1073, lr_0 = 8.1077e-04
Loss = 4.2285e-01, PNorm = 46.5782, GNorm = 1.1295, lr_0 = 8.1022e-04
Loss = 4.6251e-01, PNorm = 46.5859, GNorm = 0.7859, lr_0 = 8.0966e-04
Loss = 4.3368e-01, PNorm = 46.6061, GNorm = 1.0936, lr_0 = 8.0911e-04
Loss = 5.8005e-01, PNorm = 46.6147, GNorm = 2.5000, lr_0 = 8.0855e-04
Loss = 4.4841e-01, PNorm = 46.6346, GNorm = 1.1859, lr_0 = 8.0800e-04
Loss = 4.7910e-01, PNorm = 46.6498, GNorm = 1.4356, lr_0 = 8.0745e-04
Loss = 4.5907e-01, PNorm = 46.6593, GNorm = 1.6401, lr_0 = 8.0689e-04
Loss = 4.1880e-01, PNorm = 46.6828, GNorm = 1.5602, lr_0 = 8.0634e-04
Loss = 5.0182e-01, PNorm = 46.7047, GNorm = 1.1773, lr_0 = 8.0579e-04
Loss = 4.9848e-01, PNorm = 46.7175, GNorm = 1.2303, lr_0 = 8.0523e-04
Loss = 4.9723e-01, PNorm = 46.7360, GNorm = 2.7071, lr_0 = 8.0468e-04
Loss = 4.7736e-01, PNorm = 46.7587, GNorm = 1.2815, lr_0 = 8.0413e-04
Loss = 4.8976e-01, PNorm = 46.7803, GNorm = 1.8798, lr_0 = 8.0358e-04
Loss = 5.5625e-01, PNorm = 46.8029, GNorm = 0.8416, lr_0 = 8.0303e-04
Loss = 5.1050e-01, PNorm = 46.8247, GNorm = 1.3250, lr_0 = 8.0248e-04
Loss = 4.8642e-01, PNorm = 46.8435, GNorm = 2.0499, lr_0 = 8.0193e-04
Loss = 5.2156e-01, PNorm = 46.8645, GNorm = 1.0085, lr_0 = 8.0138e-04
Loss = 4.7191e-01, PNorm = 46.8794, GNorm = 1.9011, lr_0 = 8.0083e-04
Loss = 4.5370e-01, PNorm = 46.8950, GNorm = 1.5173, lr_0 = 8.0028e-04
Loss = 4.2894e-01, PNorm = 46.9058, GNorm = 1.4088, lr_0 = 7.9974e-04
Loss = 5.1514e-01, PNorm = 46.9160, GNorm = 2.4711, lr_0 = 7.9919e-04
Loss = 4.7094e-01, PNorm = 46.9412, GNorm = 1.2559, lr_0 = 7.9864e-04
Loss = 4.2745e-01, PNorm = 46.9598, GNorm = 1.8868, lr_0 = 7.9809e-04
Loss = 4.8918e-01, PNorm = 46.9789, GNorm = 1.4117, lr_0 = 7.9755e-04
Loss = 4.9984e-01, PNorm = 47.0003, GNorm = 1.7896, lr_0 = 7.9700e-04
Loss = 5.1133e-01, PNorm = 47.0280, GNorm = 1.3826, lr_0 = 7.9645e-04
Loss = 5.1481e-01, PNorm = 47.0498, GNorm = 1.3363, lr_0 = 7.9591e-04
Loss = 4.3262e-01, PNorm = 47.0616, GNorm = 1.6562, lr_0 = 7.9536e-04
Loss = 5.5975e-01, PNorm = 47.0812, GNorm = 1.2380, lr_0 = 7.9482e-04
Loss = 5.0644e-01, PNorm = 47.1000, GNorm = 2.5897, lr_0 = 7.9427e-04
Loss = 4.5885e-01, PNorm = 47.1210, GNorm = 2.0945, lr_0 = 7.9373e-04
Loss = 4.5000e-01, PNorm = 47.1377, GNorm = 2.4355, lr_0 = 7.9319e-04
Loss = 4.5694e-01, PNorm = 47.1548, GNorm = 1.4865, lr_0 = 7.9264e-04
Loss = 5.0336e-01, PNorm = 47.1753, GNorm = 1.4110, lr_0 = 7.9210e-04
Loss = 5.1195e-01, PNorm = 47.1903, GNorm = 2.1487, lr_0 = 7.9156e-04
Loss = 5.0316e-01, PNorm = 47.2102, GNorm = 1.8320, lr_0 = 7.9101e-04
Loss = 4.9574e-01, PNorm = 47.2350, GNorm = 1.1690, lr_0 = 7.9047e-04
Loss = 5.3322e-01, PNorm = 47.2520, GNorm = 1.1379, lr_0 = 7.8993e-04
Loss = 5.3717e-01, PNorm = 47.2734, GNorm = 1.9861, lr_0 = 7.8939e-04
Loss = 4.5095e-01, PNorm = 47.2872, GNorm = 1.1902, lr_0 = 7.8885e-04
Loss = 4.0910e-01, PNorm = 47.3031, GNorm = 1.3968, lr_0 = 7.8831e-04
Loss = 4.7407e-01, PNorm = 47.3141, GNorm = 1.9203, lr_0 = 7.8777e-04
Loss = 5.2247e-01, PNorm = 47.3398, GNorm = 2.5414, lr_0 = 7.8723e-04
Loss = 4.8731e-01, PNorm = 47.3678, GNorm = 1.5344, lr_0 = 7.8669e-04
Loss = 4.4875e-01, PNorm = 47.3918, GNorm = 1.2264, lr_0 = 7.8615e-04
Loss = 5.3188e-01, PNorm = 47.4087, GNorm = 1.8943, lr_0 = 7.8561e-04
Loss = 4.5130e-01, PNorm = 47.4196, GNorm = 1.4993, lr_0 = 7.8507e-04
Loss = 4.9379e-01, PNorm = 47.4346, GNorm = 1.8613, lr_0 = 7.8454e-04
Loss = 4.7418e-01, PNorm = 47.4484, GNorm = 1.5277, lr_0 = 7.8400e-04
Loss = 5.4821e-01, PNorm = 47.4665, GNorm = 1.9118, lr_0 = 7.8346e-04
Loss = 5.0694e-01, PNorm = 47.4844, GNorm = 1.0458, lr_0 = 7.8293e-04
Loss = 5.7509e-01, PNorm = 47.4983, GNorm = 2.5713, lr_0 = 7.8239e-04
Loss = 4.2233e-01, PNorm = 47.5187, GNorm = 1.7443, lr_0 = 7.8185e-04
Loss = 4.9494e-01, PNorm = 47.5374, GNorm = 1.3845, lr_0 = 7.8132e-04
Validation mae = 0.121454
Epoch 5
Loss = 4.6504e-01, PNorm = 47.5530, GNorm = 0.9318, lr_0 = 7.8078e-04
Loss = 4.6377e-01, PNorm = 47.5727, GNorm = 1.9783, lr_0 = 7.8025e-04
Loss = 5.2813e-01, PNorm = 47.5917, GNorm = 1.1454, lr_0 = 7.7971e-04
Loss = 4.8554e-01, PNorm = 47.6156, GNorm = 2.4682, lr_0 = 7.7918e-04
Loss = 4.8764e-01, PNorm = 47.6256, GNorm = 1.4672, lr_0 = 7.7864e-04
Loss = 4.9800e-01, PNorm = 47.6424, GNorm = 3.2926, lr_0 = 7.7811e-04
Loss = 4.9034e-01, PNorm = 47.6718, GNorm = 1.4505, lr_0 = 7.7758e-04
Loss = 4.2171e-01, PNorm = 47.6953, GNorm = 1.1885, lr_0 = 7.7705e-04
Loss = 4.6219e-01, PNorm = 47.7174, GNorm = 1.1157, lr_0 = 7.7651e-04
Loss = 5.3471e-01, PNorm = 47.7353, GNorm = 1.2461, lr_0 = 7.7598e-04
Loss = 4.9802e-01, PNorm = 47.7503, GNorm = 2.2515, lr_0 = 7.7545e-04
Loss = 4.4978e-01, PNorm = 47.7533, GNorm = 1.2396, lr_0 = 7.7492e-04
Loss = 4.7071e-01, PNorm = 47.7703, GNorm = 2.2036, lr_0 = 7.7439e-04
Loss = 5.4442e-01, PNorm = 47.7940, GNorm = 1.3130, lr_0 = 7.7386e-04
Loss = 5.1369e-01, PNorm = 47.8233, GNorm = 1.6932, lr_0 = 7.7333e-04
Loss = 4.6743e-01, PNorm = 47.8432, GNorm = 1.3541, lr_0 = 7.7280e-04
Loss = 4.4460e-01, PNorm = 47.8582, GNorm = 1.8700, lr_0 = 7.7227e-04
Loss = 4.3032e-01, PNorm = 47.8755, GNorm = 0.9329, lr_0 = 7.7174e-04
Loss = 4.9622e-01, PNorm = 47.9002, GNorm = 2.7314, lr_0 = 7.7121e-04
Loss = 4.4813e-01, PNorm = 47.9129, GNorm = 1.3061, lr_0 = 7.7068e-04
Loss = 4.9019e-01, PNorm = 47.9286, GNorm = 1.9290, lr_0 = 7.7015e-04
Loss = 4.7055e-01, PNorm = 47.9516, GNorm = 3.4860, lr_0 = 7.6963e-04
Loss = 5.0123e-01, PNorm = 47.9773, GNorm = 1.7922, lr_0 = 7.6910e-04
Loss = 5.1043e-01, PNorm = 48.0015, GNorm = 1.4830, lr_0 = 7.6857e-04
Loss = 5.2198e-01, PNorm = 48.0180, GNorm = 1.7427, lr_0 = 7.6805e-04
Loss = 4.5412e-01, PNorm = 48.0336, GNorm = 2.7376, lr_0 = 7.6752e-04
Loss = 4.8350e-01, PNorm = 48.0530, GNorm = 0.9949, lr_0 = 7.6699e-04
Loss = 4.0739e-01, PNorm = 48.0741, GNorm = 1.1764, lr_0 = 7.6647e-04
Loss = 4.3083e-01, PNorm = 48.0890, GNorm = 1.4597, lr_0 = 7.6594e-04
Loss = 4.2585e-01, PNorm = 48.1053, GNorm = 2.6114, lr_0 = 7.6542e-04
Loss = 4.5172e-01, PNorm = 48.1149, GNorm = 1.8042, lr_0 = 7.6489e-04
Loss = 4.6229e-01, PNorm = 48.1324, GNorm = 1.8670, lr_0 = 7.6437e-04
Loss = 5.3893e-01, PNorm = 48.1499, GNorm = 2.1356, lr_0 = 7.6385e-04
Loss = 4.0078e-01, PNorm = 48.1666, GNorm = 1.6052, lr_0 = 7.6332e-04
Loss = 4.1702e-01, PNorm = 48.1891, GNorm = 1.9750, lr_0 = 7.6280e-04
Loss = 4.1680e-01, PNorm = 48.2140, GNorm = 1.2407, lr_0 = 7.6228e-04
Loss = 4.3472e-01, PNorm = 48.2290, GNorm = 1.2909, lr_0 = 7.6176e-04
Loss = 5.1502e-01, PNorm = 48.2428, GNorm = 1.9582, lr_0 = 7.6123e-04
Loss = 4.5249e-01, PNorm = 48.2574, GNorm = 0.8623, lr_0 = 7.6071e-04
Loss = 3.7576e-01, PNorm = 48.2713, GNorm = 1.1255, lr_0 = 7.6019e-04
Loss = 4.5232e-01, PNorm = 48.2877, GNorm = 1.2980, lr_0 = 7.5967e-04
Loss = 4.2661e-01, PNorm = 48.3026, GNorm = 1.0769, lr_0 = 7.5915e-04
Loss = 4.7119e-01, PNorm = 48.3207, GNorm = 1.3678, lr_0 = 7.5863e-04
Loss = 4.9044e-01, PNorm = 48.3325, GNorm = 1.2723, lr_0 = 7.5811e-04
Loss = 4.8923e-01, PNorm = 48.3526, GNorm = 1.5662, lr_0 = 7.5759e-04
Loss = 4.6099e-01, PNorm = 48.3728, GNorm = 2.3013, lr_0 = 7.5707e-04
Loss = 5.5644e-01, PNorm = 48.3881, GNorm = 1.1948, lr_0 = 7.5655e-04
Loss = 4.7979e-01, PNorm = 48.4078, GNorm = 2.8566, lr_0 = 7.5603e-04
Loss = 5.0270e-01, PNorm = 48.4286, GNorm = 1.5148, lr_0 = 7.5552e-04
Loss = 4.4281e-01, PNorm = 48.4485, GNorm = 0.9519, lr_0 = 7.5500e-04
Loss = 5.0917e-01, PNorm = 48.4681, GNorm = 1.0010, lr_0 = 7.5448e-04
Loss = 4.2593e-01, PNorm = 48.4821, GNorm = 1.2801, lr_0 = 7.5397e-04
Loss = 4.9750e-01, PNorm = 48.4959, GNorm = 1.1316, lr_0 = 7.5345e-04
Loss = 4.8175e-01, PNorm = 48.5081, GNorm = 2.1944, lr_0 = 7.5293e-04
Loss = 4.9077e-01, PNorm = 48.5257, GNorm = 1.0901, lr_0 = 7.5242e-04
Loss = 4.8563e-01, PNorm = 48.5401, GNorm = 0.8386, lr_0 = 7.5190e-04
Loss = 4.1245e-01, PNorm = 48.5598, GNorm = 1.0994, lr_0 = 7.5139e-04
Loss = 5.8774e-01, PNorm = 48.5699, GNorm = 2.2834, lr_0 = 7.5087e-04
Loss = 5.2311e-01, PNorm = 48.5893, GNorm = 1.1905, lr_0 = 7.5036e-04
Loss = 5.2751e-01, PNorm = 48.6174, GNorm = 1.7181, lr_0 = 7.4984e-04
Loss = 4.5860e-01, PNorm = 48.6399, GNorm = 1.1692, lr_0 = 7.4933e-04
Loss = 4.7616e-01, PNorm = 48.6623, GNorm = 1.0041, lr_0 = 7.4882e-04
Loss = 4.2395e-01, PNorm = 48.6839, GNorm = 2.4996, lr_0 = 7.4830e-04
Loss = 4.2120e-01, PNorm = 48.7004, GNorm = 1.1321, lr_0 = 7.4779e-04
Loss = 4.5294e-01, PNorm = 48.7220, GNorm = 0.8706, lr_0 = 7.4728e-04
Loss = 3.9939e-01, PNorm = 48.7384, GNorm = 1.7068, lr_0 = 7.4677e-04
Loss = 4.4508e-01, PNorm = 48.7523, GNorm = 1.0006, lr_0 = 7.4625e-04
Loss = 5.0752e-01, PNorm = 48.7674, GNorm = 1.6997, lr_0 = 7.4574e-04
Loss = 4.9261e-01, PNorm = 48.7866, GNorm = 2.6602, lr_0 = 7.4523e-04
Loss = 4.7952e-01, PNorm = 48.8015, GNorm = 2.0091, lr_0 = 7.4472e-04
Loss = 4.8508e-01, PNorm = 48.8114, GNorm = 1.4965, lr_0 = 7.4421e-04
Loss = 4.3454e-01, PNorm = 48.8292, GNorm = 1.1871, lr_0 = 7.4370e-04
Loss = 5.1074e-01, PNorm = 48.8497, GNorm = 1.3729, lr_0 = 7.4319e-04
Loss = 4.6154e-01, PNorm = 48.8677, GNorm = 2.1024, lr_0 = 7.4268e-04
Loss = 4.7164e-01, PNorm = 48.8864, GNorm = 2.1640, lr_0 = 7.4217e-04
Loss = 4.2848e-01, PNorm = 48.9096, GNorm = 2.5467, lr_0 = 7.4167e-04
Loss = 5.0214e-01, PNorm = 48.9213, GNorm = 2.1378, lr_0 = 7.4116e-04
Loss = 5.0596e-01, PNorm = 48.9307, GNorm = 1.2605, lr_0 = 7.4065e-04
Loss = 4.4297e-01, PNorm = 48.9562, GNorm = 1.6366, lr_0 = 7.4014e-04
Loss = 4.8526e-01, PNorm = 48.9740, GNorm = 2.5853, lr_0 = 7.3964e-04
Loss = 5.8382e-01, PNorm = 48.9899, GNorm = 0.9961, lr_0 = 7.3913e-04
Loss = 5.1622e-01, PNorm = 49.0044, GNorm = 2.9728, lr_0 = 7.3862e-04
Loss = 4.8110e-01, PNorm = 49.0185, GNorm = 1.1622, lr_0 = 7.3812e-04
Loss = 5.0354e-01, PNorm = 49.0320, GNorm = 1.3374, lr_0 = 7.3761e-04
Loss = 5.2537e-01, PNorm = 49.0477, GNorm = 1.6754, lr_0 = 7.3711e-04
Loss = 4.2454e-01, PNorm = 49.0673, GNorm = 2.6407, lr_0 = 7.3660e-04
Loss = 4.3529e-01, PNorm = 49.0821, GNorm = 1.0986, lr_0 = 7.3610e-04
Loss = 4.8343e-01, PNorm = 49.1088, GNorm = 1.4010, lr_0 = 7.3559e-04
Loss = 4.6592e-01, PNorm = 49.1224, GNorm = 2.2615, lr_0 = 7.3509e-04
Loss = 4.8328e-01, PNorm = 49.1360, GNorm = 1.1499, lr_0 = 7.3458e-04
Loss = 5.3114e-01, PNorm = 49.1575, GNorm = 1.3802, lr_0 = 7.3408e-04
Loss = 4.1540e-01, PNorm = 49.1706, GNorm = 1.7085, lr_0 = 7.3358e-04
Loss = 4.3046e-01, PNorm = 49.1838, GNorm = 1.0589, lr_0 = 7.3308e-04
Loss = 4.2562e-01, PNorm = 49.1923, GNorm = 1.4498, lr_0 = 7.3257e-04
Loss = 4.3059e-01, PNorm = 49.2032, GNorm = 1.0782, lr_0 = 7.3207e-04
Loss = 5.3293e-01, PNorm = 49.2172, GNorm = 1.6507, lr_0 = 7.3157e-04
Loss = 4.6293e-01, PNorm = 49.2370, GNorm = 2.9716, lr_0 = 7.3107e-04
Loss = 5.4509e-01, PNorm = 49.2539, GNorm = 0.8865, lr_0 = 7.3057e-04
Loss = 4.8710e-01, PNorm = 49.2704, GNorm = 2.3934, lr_0 = 7.3007e-04
Loss = 4.5012e-01, PNorm = 49.2841, GNorm = 1.2196, lr_0 = 7.2957e-04
Loss = 5.2623e-01, PNorm = 49.3103, GNorm = 1.8354, lr_0 = 7.2907e-04
Loss = 4.8043e-01, PNorm = 49.3253, GNorm = 2.1750, lr_0 = 7.2857e-04
Loss = 4.2392e-01, PNorm = 49.3346, GNorm = 2.1226, lr_0 = 7.2807e-04
Loss = 4.4187e-01, PNorm = 49.3562, GNorm = 1.9807, lr_0 = 7.2757e-04
Loss = 4.3437e-01, PNorm = 49.3720, GNorm = 1.4715, lr_0 = 7.2707e-04
Loss = 4.4680e-01, PNorm = 49.3838, GNorm = 1.3215, lr_0 = 7.2657e-04
Loss = 4.6555e-01, PNorm = 49.4003, GNorm = 1.5381, lr_0 = 7.2608e-04
Loss = 4.3486e-01, PNorm = 49.4272, GNorm = 1.2289, lr_0 = 7.2558e-04
Loss = 4.7301e-01, PNorm = 49.4461, GNorm = 1.3786, lr_0 = 7.2508e-04
Loss = 4.6755e-01, PNorm = 49.4641, GNorm = 1.1350, lr_0 = 7.2458e-04
Loss = 4.9156e-01, PNorm = 49.4814, GNorm = 1.3316, lr_0 = 7.2409e-04
Loss = 4.8617e-01, PNorm = 49.4974, GNorm = 1.3043, lr_0 = 7.2359e-04
Loss = 5.0301e-01, PNorm = 49.5127, GNorm = 3.0383, lr_0 = 7.2310e-04
Loss = 4.2205e-01, PNorm = 49.5243, GNorm = 1.4343, lr_0 = 7.2260e-04
Loss = 4.6381e-01, PNorm = 49.5405, GNorm = 1.4605, lr_0 = 7.2211e-04
Loss = 4.4671e-01, PNorm = 49.5588, GNorm = 2.7925, lr_0 = 7.2161e-04
Loss = 5.1777e-01, PNorm = 49.5674, GNorm = 3.7573, lr_0 = 7.2112e-04
Loss = 4.4509e-01, PNorm = 49.5835, GNorm = 1.9084, lr_0 = 7.2062e-04
Loss = 4.7366e-01, PNorm = 49.6116, GNorm = 2.2228, lr_0 = 7.2013e-04
Loss = 5.0209e-01, PNorm = 49.6266, GNorm = 1.2802, lr_0 = 7.1964e-04
Validation mae = 0.121186
Epoch 6
Loss = 3.9520e-01, PNorm = 49.6465, GNorm = 1.0922, lr_0 = 7.1914e-04
Loss = 4.4529e-01, PNorm = 49.6599, GNorm = 2.0516, lr_0 = 7.1865e-04
Loss = 4.2939e-01, PNorm = 49.6785, GNorm = 2.4570, lr_0 = 7.1816e-04
Loss = 4.7672e-01, PNorm = 49.6916, GNorm = 1.7264, lr_0 = 7.1767e-04
Loss = 4.5243e-01, PNorm = 49.7114, GNorm = 2.1107, lr_0 = 7.1717e-04
Loss = 4.2177e-01, PNorm = 49.7224, GNorm = 1.4736, lr_0 = 7.1668e-04
Loss = 5.1250e-01, PNorm = 49.7385, GNorm = 1.9399, lr_0 = 7.1619e-04
Loss = 4.4491e-01, PNorm = 49.7480, GNorm = 1.9388, lr_0 = 7.1570e-04
Loss = 4.3930e-01, PNorm = 49.7713, GNorm = 1.8081, lr_0 = 7.1521e-04
Loss = 3.9541e-01, PNorm = 49.7799, GNorm = 1.8332, lr_0 = 7.1472e-04
Loss = 5.6662e-01, PNorm = 49.7984, GNorm = 1.9927, lr_0 = 7.1423e-04
Loss = 4.5049e-01, PNorm = 49.8277, GNorm = 1.9919, lr_0 = 7.1374e-04
Loss = 4.7241e-01, PNorm = 49.8472, GNorm = 2.1035, lr_0 = 7.1325e-04
Loss = 4.6873e-01, PNorm = 49.8610, GNorm = 1.2622, lr_0 = 7.1277e-04
Loss = 5.3743e-01, PNorm = 49.8785, GNorm = 1.4646, lr_0 = 7.1228e-04
Loss = 4.3835e-01, PNorm = 49.8989, GNorm = 1.1197, lr_0 = 7.1179e-04
Loss = 5.4516e-01, PNorm = 49.9148, GNorm = 2.0714, lr_0 = 7.1130e-04
Loss = 4.8948e-01, PNorm = 49.9270, GNorm = 1.9543, lr_0 = 7.1081e-04
Loss = 4.3813e-01, PNorm = 49.9389, GNorm = 1.3360, lr_0 = 7.1033e-04
Loss = 4.5712e-01, PNorm = 49.9535, GNorm = 2.0599, lr_0 = 7.0984e-04
Loss = 4.3247e-01, PNorm = 49.9659, GNorm = 0.9667, lr_0 = 7.0935e-04
Loss = 4.3341e-01, PNorm = 49.9811, GNorm = 1.1154, lr_0 = 7.0887e-04
Loss = 4.3126e-01, PNorm = 49.9961, GNorm = 1.6262, lr_0 = 7.0838e-04
Loss = 4.8803e-01, PNorm = 50.0065, GNorm = 0.8677, lr_0 = 7.0790e-04
Loss = 4.4397e-01, PNorm = 50.0265, GNorm = 1.9256, lr_0 = 7.0741e-04
Loss = 4.2556e-01, PNorm = 50.0478, GNorm = 1.4576, lr_0 = 7.0693e-04
Loss = 4.4334e-01, PNorm = 50.0571, GNorm = 1.8008, lr_0 = 7.0644e-04
Loss = 4.3095e-01, PNorm = 50.0678, GNorm = 1.4062, lr_0 = 7.0596e-04
Loss = 4.6288e-01, PNorm = 50.0894, GNorm = 1.5378, lr_0 = 7.0548e-04
Loss = 4.8624e-01, PNorm = 50.1097, GNorm = 0.9593, lr_0 = 7.0499e-04
Loss = 4.7451e-01, PNorm = 50.1210, GNorm = 1.7914, lr_0 = 7.0451e-04
Loss = 4.3171e-01, PNorm = 50.1283, GNorm = 1.7747, lr_0 = 7.0403e-04
Loss = 4.1207e-01, PNorm = 50.1394, GNorm = 1.8219, lr_0 = 7.0354e-04
Loss = 4.4087e-01, PNorm = 50.1500, GNorm = 1.2499, lr_0 = 7.0306e-04
Loss = 4.3943e-01, PNorm = 50.1686, GNorm = 1.0606, lr_0 = 7.0258e-04
Loss = 5.3261e-01, PNorm = 50.1800, GNorm = 2.9195, lr_0 = 7.0210e-04
Loss = 4.1545e-01, PNorm = 50.1937, GNorm = 1.3643, lr_0 = 7.0162e-04
Loss = 4.8155e-01, PNorm = 50.2083, GNorm = 1.3563, lr_0 = 7.0114e-04
Loss = 4.6714e-01, PNorm = 50.2228, GNorm = 1.5569, lr_0 = 7.0066e-04
Loss = 4.8759e-01, PNorm = 50.2399, GNorm = 1.2543, lr_0 = 7.0018e-04
Loss = 4.9533e-01, PNorm = 50.2544, GNorm = 2.0766, lr_0 = 6.9970e-04
Loss = 4.8850e-01, PNorm = 50.2711, GNorm = 1.5214, lr_0 = 6.9922e-04
Loss = 4.5417e-01, PNorm = 50.2847, GNorm = 1.3217, lr_0 = 6.9874e-04
Loss = 4.5901e-01, PNorm = 50.2893, GNorm = 2.0970, lr_0 = 6.9826e-04
Loss = 5.3180e-01, PNorm = 50.3108, GNorm = 1.2778, lr_0 = 6.9778e-04
Loss = 4.4478e-01, PNorm = 50.3309, GNorm = 2.2791, lr_0 = 6.9730e-04
Loss = 4.5762e-01, PNorm = 50.3569, GNorm = 1.9814, lr_0 = 6.9683e-04
Loss = 4.7087e-01, PNorm = 50.3843, GNorm = 2.3960, lr_0 = 6.9635e-04
Loss = 4.2338e-01, PNorm = 50.4071, GNorm = 0.7282, lr_0 = 6.9587e-04
Loss = 5.1403e-01, PNorm = 50.4210, GNorm = 1.2893, lr_0 = 6.9540e-04
Loss = 4.6215e-01, PNorm = 50.4373, GNorm = 2.2770, lr_0 = 6.9492e-04
Loss = 4.7093e-01, PNorm = 50.4594, GNorm = 1.2301, lr_0 = 6.9444e-04
Loss = 4.3063e-01, PNorm = 50.4790, GNorm = 1.3881, lr_0 = 6.9397e-04
Loss = 3.8963e-01, PNorm = 50.4981, GNorm = 1.5241, lr_0 = 6.9349e-04
Loss = 4.9406e-01, PNorm = 50.5092, GNorm = 1.4543, lr_0 = 6.9302e-04
Loss = 3.9704e-01, PNorm = 50.5163, GNorm = 1.4942, lr_0 = 6.9254e-04
Loss = 4.9677e-01, PNorm = 50.5334, GNorm = 2.3202, lr_0 = 6.9207e-04
Loss = 4.6475e-01, PNorm = 50.5427, GNorm = 1.0535, lr_0 = 6.9159e-04
Loss = 5.2861e-01, PNorm = 50.5565, GNorm = 0.9445, lr_0 = 6.9112e-04
Loss = 5.0668e-01, PNorm = 50.5779, GNorm = 1.1825, lr_0 = 6.9065e-04
Loss = 4.5730e-01, PNorm = 50.5915, GNorm = 1.5147, lr_0 = 6.9017e-04
Loss = 5.2454e-01, PNorm = 50.6129, GNorm = 1.2620, lr_0 = 6.8970e-04
Loss = 4.4900e-01, PNorm = 50.6287, GNorm = 1.0554, lr_0 = 6.8923e-04
Loss = 4.7062e-01, PNorm = 50.6458, GNorm = 1.2246, lr_0 = 6.8876e-04
Loss = 4.7031e-01, PNorm = 50.6564, GNorm = 1.4329, lr_0 = 6.8828e-04
Loss = 4.3101e-01, PNorm = 50.6797, GNorm = 0.9497, lr_0 = 6.8781e-04
Loss = 4.2850e-01, PNorm = 50.7026, GNorm = 1.2727, lr_0 = 6.8734e-04
Loss = 4.9137e-01, PNorm = 50.7110, GNorm = 2.5937, lr_0 = 6.8687e-04
Loss = 4.1045e-01, PNorm = 50.7253, GNorm = 1.7238, lr_0 = 6.8640e-04
Loss = 4.6853e-01, PNorm = 50.7453, GNorm = 1.7524, lr_0 = 6.8593e-04
Loss = 4.7425e-01, PNorm = 50.7619, GNorm = 2.7042, lr_0 = 6.8546e-04
Loss = 4.5809e-01, PNorm = 50.7727, GNorm = 1.0112, lr_0 = 6.8499e-04
Loss = 4.5947e-01, PNorm = 50.7920, GNorm = 2.1267, lr_0 = 6.8452e-04
Loss = 5.1639e-01, PNorm = 50.8085, GNorm = 1.2454, lr_0 = 6.8405e-04
Loss = 4.3361e-01, PNorm = 50.8249, GNorm = 1.1187, lr_0 = 6.8358e-04
Loss = 4.0908e-01, PNorm = 50.8366, GNorm = 1.3322, lr_0 = 6.8312e-04
Loss = 4.2343e-01, PNorm = 50.8480, GNorm = 0.9085, lr_0 = 6.8265e-04
Loss = 5.2040e-01, PNorm = 50.8610, GNorm = 2.0526, lr_0 = 6.8218e-04
Loss = 4.4685e-01, PNorm = 50.8747, GNorm = 1.9127, lr_0 = 6.8171e-04
Loss = 3.9951e-01, PNorm = 50.8920, GNorm = 1.2251, lr_0 = 6.8125e-04
Loss = 4.3390e-01, PNorm = 50.9062, GNorm = 1.3065, lr_0 = 6.8078e-04
Loss = 4.6577e-01, PNorm = 50.9193, GNorm = 1.3240, lr_0 = 6.8031e-04
Loss = 4.6419e-01, PNorm = 50.9383, GNorm = 1.3977, lr_0 = 6.7985e-04
Loss = 4.5947e-01, PNorm = 50.9624, GNorm = 1.1435, lr_0 = 6.7938e-04
Loss = 4.5194e-01, PNorm = 50.9863, GNorm = 2.5175, lr_0 = 6.7892e-04
Loss = 4.8935e-01, PNorm = 51.0035, GNorm = 2.3261, lr_0 = 6.7845e-04
Loss = 4.3939e-01, PNorm = 51.0189, GNorm = 1.8669, lr_0 = 6.7799e-04
Loss = 4.3031e-01, PNorm = 51.0321, GNorm = 1.1035, lr_0 = 6.7752e-04
Loss = 5.0934e-01, PNorm = 51.0400, GNorm = 2.2165, lr_0 = 6.7706e-04
Loss = 5.0828e-01, PNorm = 51.0493, GNorm = 2.0429, lr_0 = 6.7659e-04
Loss = 5.0012e-01, PNorm = 51.0644, GNorm = 2.1527, lr_0 = 6.7613e-04
Loss = 4.3088e-01, PNorm = 51.0790, GNorm = 1.0707, lr_0 = 6.7567e-04
Loss = 4.2971e-01, PNorm = 51.0966, GNorm = 1.1726, lr_0 = 6.7520e-04
Loss = 4.1022e-01, PNorm = 51.1115, GNorm = 1.7408, lr_0 = 6.7474e-04
Loss = 4.0842e-01, PNorm = 51.1179, GNorm = 0.9246, lr_0 = 6.7428e-04
Loss = 4.1318e-01, PNorm = 51.1264, GNorm = 1.0367, lr_0 = 6.7382e-04
Loss = 4.5213e-01, PNorm = 51.1383, GNorm = 1.4349, lr_0 = 6.7335e-04
Loss = 5.3456e-01, PNorm = 51.1560, GNorm = 2.8201, lr_0 = 6.7289e-04
Loss = 4.3934e-01, PNorm = 51.1615, GNorm = 1.3110, lr_0 = 6.7243e-04
Loss = 4.7786e-01, PNorm = 51.1735, GNorm = 1.1479, lr_0 = 6.7197e-04
Loss = 4.8418e-01, PNorm = 51.1927, GNorm = 1.2764, lr_0 = 6.7151e-04
Loss = 4.2227e-01, PNorm = 51.2089, GNorm = 1.1606, lr_0 = 6.7105e-04
Loss = 4.5495e-01, PNorm = 51.2204, GNorm = 1.6501, lr_0 = 6.7059e-04
Loss = 4.2674e-01, PNorm = 51.2292, GNorm = 1.0061, lr_0 = 6.7013e-04
Loss = 3.8936e-01, PNorm = 51.2401, GNorm = 1.1125, lr_0 = 6.6967e-04
Loss = 4.5338e-01, PNorm = 51.2467, GNorm = 2.9974, lr_0 = 6.6921e-04
Loss = 4.5969e-01, PNorm = 51.2592, GNorm = 0.8389, lr_0 = 6.6876e-04
Loss = 4.3963e-01, PNorm = 51.2690, GNorm = 1.6724, lr_0 = 6.6830e-04
Loss = 4.3680e-01, PNorm = 51.2922, GNorm = 1.9423, lr_0 = 6.6784e-04
Loss = 4.3028e-01, PNorm = 51.3105, GNorm = 1.2793, lr_0 = 6.6738e-04
Loss = 4.5534e-01, PNorm = 51.3278, GNorm = 2.3164, lr_0 = 6.6693e-04
Loss = 4.4355e-01, PNorm = 51.3378, GNorm = 1.5200, lr_0 = 6.6647e-04
Loss = 4.1172e-01, PNorm = 51.3485, GNorm = 1.5913, lr_0 = 6.6601e-04
Loss = 4.8570e-01, PNorm = 51.3603, GNorm = 1.2637, lr_0 = 6.6556e-04
Loss = 4.5840e-01, PNorm = 51.3784, GNorm = 1.2142, lr_0 = 6.6510e-04
Loss = 4.2238e-01, PNorm = 51.3956, GNorm = 1.6195, lr_0 = 6.6464e-04
Loss = 4.3758e-01, PNorm = 51.4018, GNorm = 1.1583, lr_0 = 6.6419e-04
Loss = 4.9216e-01, PNorm = 51.4127, GNorm = 1.0492, lr_0 = 6.6373e-04
Loss = 4.7011e-01, PNorm = 51.4234, GNorm = 1.6552, lr_0 = 6.6328e-04
Loss = 4.6557e-01, PNorm = 51.4367, GNorm = 2.1464, lr_0 = 6.6282e-04
Validation mae = 0.118961
Epoch 7
Loss = 4.9808e-01, PNorm = 51.4479, GNorm = 0.7440, lr_0 = 6.6237e-04
Loss = 4.1240e-01, PNorm = 51.4631, GNorm = 2.4385, lr_0 = 6.6192e-04
Loss = 4.7108e-01, PNorm = 51.4799, GNorm = 2.0976, lr_0 = 6.6146e-04
Loss = 4.5147e-01, PNorm = 51.4974, GNorm = 1.4861, lr_0 = 6.6101e-04
Loss = 4.7149e-01, PNorm = 51.5035, GNorm = 1.1320, lr_0 = 6.6056e-04
Loss = 4.5764e-01, PNorm = 51.5174, GNorm = 1.7687, lr_0 = 6.6011e-04
Loss = 4.4288e-01, PNorm = 51.5340, GNorm = 1.0492, lr_0 = 6.5965e-04
Loss = 4.6380e-01, PNorm = 51.5451, GNorm = 2.2865, lr_0 = 6.5920e-04
Loss = 4.8583e-01, PNorm = 51.5605, GNorm = 0.8436, lr_0 = 6.5875e-04
Loss = 4.4127e-01, PNorm = 51.5819, GNorm = 1.1345, lr_0 = 6.5830e-04
Loss = 4.6742e-01, PNorm = 51.5956, GNorm = 1.9174, lr_0 = 6.5785e-04
Loss = 4.1307e-01, PNorm = 51.6126, GNorm = 1.8078, lr_0 = 6.5740e-04
Loss = 4.2639e-01, PNorm = 51.6234, GNorm = 2.2082, lr_0 = 6.5695e-04
Loss = 5.0829e-01, PNorm = 51.6421, GNorm = 1.2322, lr_0 = 6.5650e-04
Loss = 4.8443e-01, PNorm = 51.6587, GNorm = 1.6110, lr_0 = 6.5605e-04
Loss = 4.3294e-01, PNorm = 51.6733, GNorm = 2.3846, lr_0 = 6.5560e-04
Loss = 4.0102e-01, PNorm = 51.6861, GNorm = 2.1217, lr_0 = 6.5515e-04
Loss = 4.4708e-01, PNorm = 51.7040, GNorm = 1.5471, lr_0 = 6.5470e-04
Loss = 4.2712e-01, PNorm = 51.7176, GNorm = 1.6955, lr_0 = 6.5425e-04
Loss = 4.3378e-01, PNorm = 51.7284, GNorm = 1.3266, lr_0 = 6.5380e-04
Loss = 4.1076e-01, PNorm = 51.7389, GNorm = 1.1021, lr_0 = 6.5335e-04
Loss = 4.6745e-01, PNorm = 51.7496, GNorm = 1.0513, lr_0 = 6.5291e-04
Loss = 4.2717e-01, PNorm = 51.7603, GNorm = 2.1456, lr_0 = 6.5246e-04
Loss = 4.0334e-01, PNorm = 51.7721, GNorm = 1.5767, lr_0 = 6.5201e-04
Loss = 4.4678e-01, PNorm = 51.7865, GNorm = 2.0778, lr_0 = 6.5157e-04
Loss = 4.4212e-01, PNorm = 51.8044, GNorm = 1.0866, lr_0 = 6.5112e-04
Loss = 4.5666e-01, PNorm = 51.8243, GNorm = 1.2924, lr_0 = 6.5067e-04
Loss = 4.4709e-01, PNorm = 51.8429, GNorm = 1.4745, lr_0 = 6.5023e-04
Loss = 4.4557e-01, PNorm = 51.8566, GNorm = 1.0697, lr_0 = 6.4978e-04
Loss = 4.7275e-01, PNorm = 51.8743, GNorm = 3.3749, lr_0 = 6.4934e-04
Loss = 4.9474e-01, PNorm = 51.8940, GNorm = 1.0391, lr_0 = 6.4889e-04
Loss = 4.6404e-01, PNorm = 51.9116, GNorm = 2.2841, lr_0 = 6.4845e-04
Loss = 4.1194e-01, PNorm = 51.9290, GNorm = 1.2973, lr_0 = 6.4800e-04
Loss = 4.2780e-01, PNorm = 51.9440, GNorm = 1.4883, lr_0 = 6.4756e-04
Loss = 4.0213e-01, PNorm = 51.9618, GNorm = 1.0213, lr_0 = 6.4712e-04
Loss = 4.0398e-01, PNorm = 51.9734, GNorm = 1.5101, lr_0 = 6.4667e-04
Loss = 4.3543e-01, PNorm = 51.9831, GNorm = 2.9779, lr_0 = 6.4623e-04
Loss = 4.4153e-01, PNorm = 51.9918, GNorm = 1.5537, lr_0 = 6.4579e-04
Loss = 5.2166e-01, PNorm = 52.0120, GNorm = 1.1417, lr_0 = 6.4534e-04
Loss = 4.6461e-01, PNorm = 52.0393, GNorm = 2.2599, lr_0 = 6.4490e-04
Loss = 4.3690e-01, PNorm = 52.0553, GNorm = 2.9773, lr_0 = 6.4446e-04
Loss = 4.2869e-01, PNorm = 52.0706, GNorm = 1.2993, lr_0 = 6.4402e-04
Loss = 5.2670e-01, PNorm = 52.0877, GNorm = 0.9454, lr_0 = 6.4358e-04
Loss = 4.2559e-01, PNorm = 52.1033, GNorm = 1.1569, lr_0 = 6.4314e-04
Loss = 4.8008e-01, PNorm = 52.1098, GNorm = 1.4647, lr_0 = 6.4270e-04
Loss = 4.5328e-01, PNorm = 52.1178, GNorm = 1.4494, lr_0 = 6.4226e-04
Loss = 4.4544e-01, PNorm = 52.1373, GNorm = 1.3978, lr_0 = 6.4182e-04
Loss = 4.0877e-01, PNorm = 52.1477, GNorm = 1.1480, lr_0 = 6.4138e-04
Loss = 4.5721e-01, PNorm = 52.1628, GNorm = 1.1599, lr_0 = 6.4094e-04
Loss = 4.8771e-01, PNorm = 52.1788, GNorm = 1.1299, lr_0 = 6.4050e-04
Loss = 3.8799e-01, PNorm = 52.1904, GNorm = 0.9294, lr_0 = 6.4006e-04
Loss = 4.3200e-01, PNorm = 52.2019, GNorm = 1.2542, lr_0 = 6.3962e-04
Loss = 4.9719e-01, PNorm = 52.2209, GNorm = 1.4143, lr_0 = 6.3918e-04
Loss = 4.6025e-01, PNorm = 52.2381, GNorm = 1.4706, lr_0 = 6.3874e-04
Loss = 4.7473e-01, PNorm = 52.2456, GNorm = 1.7064, lr_0 = 6.3831e-04
Loss = 4.4417e-01, PNorm = 52.2570, GNorm = 1.4915, lr_0 = 6.3787e-04
Loss = 4.3014e-01, PNorm = 52.2612, GNorm = 1.2579, lr_0 = 6.3743e-04
Loss = 4.3946e-01, PNorm = 52.2720, GNorm = 1.3561, lr_0 = 6.3700e-04
Loss = 4.0970e-01, PNorm = 52.2879, GNorm = 1.0256, lr_0 = 6.3656e-04
Loss = 4.1144e-01, PNorm = 52.2987, GNorm = 1.1984, lr_0 = 6.3612e-04
Loss = 4.7275e-01, PNorm = 52.3074, GNorm = 1.3389, lr_0 = 6.3569e-04
Loss = 4.7322e-01, PNorm = 52.3194, GNorm = 0.9809, lr_0 = 6.3525e-04
Loss = 4.4265e-01, PNorm = 52.3384, GNorm = 1.2986, lr_0 = 6.3482e-04
Loss = 4.3192e-01, PNorm = 52.3462, GNorm = 1.5732, lr_0 = 6.3438e-04
Loss = 4.4639e-01, PNorm = 52.3581, GNorm = 1.6661, lr_0 = 6.3395e-04
Loss = 4.3071e-01, PNorm = 52.3746, GNorm = 1.9991, lr_0 = 6.3351e-04
Loss = 4.3429e-01, PNorm = 52.3881, GNorm = 1.0772, lr_0 = 6.3308e-04
Loss = 4.8857e-01, PNorm = 52.4087, GNorm = 1.7632, lr_0 = 6.3265e-04
Loss = 4.4834e-01, PNorm = 52.4289, GNorm = 1.2860, lr_0 = 6.3221e-04
Loss = 4.1795e-01, PNorm = 52.4475, GNorm = 2.4072, lr_0 = 6.3178e-04
Loss = 4.5731e-01, PNorm = 52.4564, GNorm = 1.2707, lr_0 = 6.3135e-04
Loss = 4.3257e-01, PNorm = 52.4630, GNorm = 1.0178, lr_0 = 6.3091e-04
Loss = 4.1149e-01, PNorm = 52.4713, GNorm = 1.3759, lr_0 = 6.3048e-04
Loss = 4.8361e-01, PNorm = 52.4744, GNorm = 1.2863, lr_0 = 6.3005e-04
Loss = 4.8701e-01, PNorm = 52.4872, GNorm = 1.0071, lr_0 = 6.2962e-04
Loss = 4.6389e-01, PNorm = 52.4999, GNorm = 2.6293, lr_0 = 6.2919e-04
Loss = 4.4897e-01, PNorm = 52.5094, GNorm = 1.0729, lr_0 = 6.2876e-04
Loss = 4.4828e-01, PNorm = 52.5238, GNorm = 1.2832, lr_0 = 6.2833e-04
Loss = 3.8548e-01, PNorm = 52.5337, GNorm = 1.2921, lr_0 = 6.2789e-04
Loss = 4.6351e-01, PNorm = 52.5475, GNorm = 1.0146, lr_0 = 6.2746e-04
Loss = 4.1551e-01, PNorm = 52.5589, GNorm = 1.2215, lr_0 = 6.2703e-04
Loss = 5.0319e-01, PNorm = 52.5757, GNorm = 1.7846, lr_0 = 6.2661e-04
Loss = 3.2937e-01, PNorm = 52.5891, GNorm = 0.6674, lr_0 = 6.2618e-04
Loss = 4.5924e-01, PNorm = 52.6049, GNorm = 1.0943, lr_0 = 6.2575e-04
Loss = 4.8744e-01, PNorm = 52.6245, GNorm = 1.3645, lr_0 = 6.2532e-04
Loss = 4.2906e-01, PNorm = 52.6405, GNorm = 1.7704, lr_0 = 6.2489e-04
Loss = 3.9814e-01, PNorm = 52.6538, GNorm = 1.6626, lr_0 = 6.2446e-04
Loss = 4.2974e-01, PNorm = 52.6736, GNorm = 1.4333, lr_0 = 6.2403e-04
Loss = 4.6551e-01, PNorm = 52.6778, GNorm = 1.4689, lr_0 = 6.2361e-04
Loss = 4.3161e-01, PNorm = 52.6887, GNorm = 1.7752, lr_0 = 6.2318e-04
Loss = 4.8594e-01, PNorm = 52.7018, GNorm = 1.4724, lr_0 = 6.2275e-04
Loss = 4.4122e-01, PNorm = 52.7159, GNorm = 0.9757, lr_0 = 6.2233e-04
Loss = 4.7223e-01, PNorm = 52.7330, GNorm = 1.3197, lr_0 = 6.2190e-04
Loss = 4.5643e-01, PNorm = 52.7454, GNorm = 3.9985, lr_0 = 6.2147e-04
Loss = 4.3536e-01, PNorm = 52.7692, GNorm = 0.8910, lr_0 = 6.2105e-04
Loss = 4.1620e-01, PNorm = 52.7852, GNorm = 1.7008, lr_0 = 6.2062e-04
Loss = 4.4611e-01, PNorm = 52.7981, GNorm = 1.2211, lr_0 = 6.2020e-04
Loss = 3.8343e-01, PNorm = 52.8077, GNorm = 1.1364, lr_0 = 6.1977e-04
Loss = 4.5850e-01, PNorm = 52.8203, GNorm = 1.2175, lr_0 = 6.1935e-04
Loss = 4.2484e-01, PNorm = 52.8259, GNorm = 2.0131, lr_0 = 6.1892e-04
Loss = 4.7594e-01, PNorm = 52.8360, GNorm = 1.3989, lr_0 = 6.1850e-04
Loss = 4.5984e-01, PNorm = 52.8420, GNorm = 1.6384, lr_0 = 6.1808e-04
Loss = 4.3518e-01, PNorm = 52.8549, GNorm = 1.1962, lr_0 = 6.1765e-04
Loss = 4.1887e-01, PNorm = 52.8672, GNorm = 1.2883, lr_0 = 6.1723e-04
Loss = 4.1462e-01, PNorm = 52.8756, GNorm = 1.7772, lr_0 = 6.1681e-04
Loss = 4.3402e-01, PNorm = 52.8781, GNorm = 1.3380, lr_0 = 6.1638e-04
Loss = 3.9954e-01, PNorm = 52.8887, GNorm = 0.9597, lr_0 = 6.1596e-04
Loss = 4.5366e-01, PNorm = 52.9058, GNorm = 1.1663, lr_0 = 6.1554e-04
Loss = 4.6406e-01, PNorm = 52.9216, GNorm = 1.4616, lr_0 = 6.1512e-04
Loss = 4.2171e-01, PNorm = 52.9297, GNorm = 1.3567, lr_0 = 6.1470e-04
Loss = 5.1835e-01, PNorm = 52.9378, GNorm = 1.5655, lr_0 = 6.1428e-04
Loss = 4.0169e-01, PNorm = 52.9502, GNorm = 1.1044, lr_0 = 6.1385e-04
Loss = 5.1456e-01, PNorm = 52.9584, GNorm = 2.5579, lr_0 = 6.1343e-04
Loss = 4.5902e-01, PNorm = 52.9772, GNorm = 2.3106, lr_0 = 6.1301e-04
Loss = 4.2419e-01, PNorm = 52.9914, GNorm = 1.2461, lr_0 = 6.1259e-04
Loss = 4.1121e-01, PNorm = 53.0061, GNorm = 1.3031, lr_0 = 6.1217e-04
Loss = 4.3224e-01, PNorm = 53.0128, GNorm = 1.7255, lr_0 = 6.1175e-04
Loss = 4.1795e-01, PNorm = 53.0163, GNorm = 1.5536, lr_0 = 6.1134e-04
Loss = 4.5858e-01, PNorm = 53.0243, GNorm = 1.6707, lr_0 = 6.1092e-04
Loss = 4.2172e-01, PNorm = 53.0395, GNorm = 1.8105, lr_0 = 6.1050e-04
Validation mae = 0.116833
Epoch 8
Loss = 3.9970e-01, PNorm = 53.0509, GNorm = 1.5592, lr_0 = 6.1008e-04
Loss = 4.3989e-01, PNorm = 53.0617, GNorm = 1.2156, lr_0 = 6.0966e-04
Loss = 4.7197e-01, PNorm = 53.0663, GNorm = 1.3200, lr_0 = 6.0924e-04
Loss = 4.0728e-01, PNorm = 53.0836, GNorm = 2.0364, lr_0 = 6.0883e-04
Loss = 4.1279e-01, PNorm = 53.0956, GNorm = 1.4946, lr_0 = 6.0841e-04
Loss = 4.1753e-01, PNorm = 53.1084, GNorm = 1.2034, lr_0 = 6.0799e-04
Loss = 4.1525e-01, PNorm = 53.1277, GNorm = 1.1706, lr_0 = 6.0758e-04
Loss = 4.8292e-01, PNorm = 53.1355, GNorm = 1.3763, lr_0 = 6.0716e-04
Loss = 4.2138e-01, PNorm = 53.1454, GNorm = 2.1306, lr_0 = 6.0674e-04
Loss = 4.1000e-01, PNorm = 53.1603, GNorm = 1.0483, lr_0 = 6.0633e-04
Loss = 4.5855e-01, PNorm = 53.1774, GNorm = 1.4320, lr_0 = 6.0591e-04
Loss = 4.0461e-01, PNorm = 53.1983, GNorm = 1.7637, lr_0 = 6.0550e-04
Loss = 4.4691e-01, PNorm = 53.2120, GNorm = 1.1000, lr_0 = 6.0508e-04
Loss = 4.3839e-01, PNorm = 53.2292, GNorm = 1.1106, lr_0 = 6.0467e-04
Loss = 4.3803e-01, PNorm = 53.2363, GNorm = 1.3523, lr_0 = 6.0425e-04
Loss = 4.1917e-01, PNorm = 53.2489, GNorm = 1.3602, lr_0 = 6.0384e-04
Loss = 4.1341e-01, PNorm = 53.2581, GNorm = 1.2684, lr_0 = 6.0343e-04
Loss = 4.0547e-01, PNorm = 53.2666, GNorm = 1.2735, lr_0 = 6.0301e-04
Loss = 4.2118e-01, PNorm = 53.2760, GNorm = 0.9680, lr_0 = 6.0260e-04
Loss = 3.9006e-01, PNorm = 53.2923, GNorm = 1.3168, lr_0 = 6.0219e-04
Loss = 4.0076e-01, PNorm = 53.3019, GNorm = 1.4520, lr_0 = 6.0178e-04
Loss = 4.3145e-01, PNorm = 53.3131, GNorm = 1.1954, lr_0 = 6.0136e-04
Loss = 4.7243e-01, PNorm = 53.3258, GNorm = 1.1363, lr_0 = 6.0095e-04
Loss = 4.0067e-01, PNorm = 53.3384, GNorm = 1.2421, lr_0 = 6.0054e-04
Loss = 3.6425e-01, PNorm = 53.3452, GNorm = 0.9111, lr_0 = 6.0013e-04
Loss = 4.2276e-01, PNorm = 53.3548, GNorm = 1.5670, lr_0 = 5.9972e-04
Loss = 4.1600e-01, PNorm = 53.3693, GNorm = 1.0357, lr_0 = 5.9931e-04
Loss = 4.5297e-01, PNorm = 53.3760, GNorm = 1.1244, lr_0 = 5.9890e-04
Loss = 5.0283e-01, PNorm = 53.3897, GNorm = 3.0039, lr_0 = 5.9849e-04
Loss = 4.3215e-01, PNorm = 53.4011, GNorm = 0.9795, lr_0 = 5.9808e-04
Loss = 4.3690e-01, PNorm = 53.4137, GNorm = 1.1285, lr_0 = 5.9767e-04
Loss = 4.4680e-01, PNorm = 53.4283, GNorm = 1.5569, lr_0 = 5.9726e-04
Loss = 4.7167e-01, PNorm = 53.4381, GNorm = 2.0812, lr_0 = 5.9685e-04
Loss = 3.8874e-01, PNorm = 53.4565, GNorm = 1.3706, lr_0 = 5.9644e-04
Loss = 4.5990e-01, PNorm = 53.4660, GNorm = 1.4503, lr_0 = 5.9603e-04
Loss = 3.5561e-01, PNorm = 53.4759, GNorm = 1.4648, lr_0 = 5.9562e-04
Loss = 4.6017e-01, PNorm = 53.4888, GNorm = 1.2026, lr_0 = 5.9521e-04
Loss = 4.1204e-01, PNorm = 53.5008, GNorm = 1.2740, lr_0 = 5.9481e-04
Loss = 5.3156e-01, PNorm = 53.5087, GNorm = 1.2509, lr_0 = 5.9440e-04
Loss = 4.2254e-01, PNorm = 53.5242, GNorm = 1.0956, lr_0 = 5.9399e-04
Loss = 4.9558e-01, PNorm = 53.5302, GNorm = 1.2251, lr_0 = 5.9358e-04
Loss = 4.9913e-01, PNorm = 53.5450, GNorm = 1.3709, lr_0 = 5.9318e-04
Loss = 4.2764e-01, PNorm = 53.5622, GNorm = 1.4106, lr_0 = 5.9277e-04
Loss = 4.1336e-01, PNorm = 53.5713, GNorm = 1.5369, lr_0 = 5.9236e-04
Loss = 4.4187e-01, PNorm = 53.5821, GNorm = 1.7463, lr_0 = 5.9196e-04
Loss = 3.8219e-01, PNorm = 53.5941, GNorm = 1.6260, lr_0 = 5.9155e-04
Loss = 4.0381e-01, PNorm = 53.6069, GNorm = 1.0867, lr_0 = 5.9115e-04
Loss = 4.0473e-01, PNorm = 53.6245, GNorm = 1.1918, lr_0 = 5.9074e-04
Loss = 4.4796e-01, PNorm = 53.6407, GNorm = 1.6014, lr_0 = 5.9034e-04
Loss = 4.8415e-01, PNorm = 53.6501, GNorm = 2.3201, lr_0 = 5.8993e-04
Loss = 4.2189e-01, PNorm = 53.6637, GNorm = 1.6481, lr_0 = 5.8953e-04
Loss = 4.2417e-01, PNorm = 53.6802, GNorm = 1.4027, lr_0 = 5.8913e-04
Loss = 4.8144e-01, PNorm = 53.6964, GNorm = 1.7069, lr_0 = 5.8872e-04
Loss = 4.8516e-01, PNorm = 53.7105, GNorm = 2.0509, lr_0 = 5.8832e-04
Loss = 4.2103e-01, PNorm = 53.7199, GNorm = 1.3976, lr_0 = 5.8792e-04
Loss = 4.6326e-01, PNorm = 53.7343, GNorm = 1.7981, lr_0 = 5.8751e-04
Loss = 4.4008e-01, PNorm = 53.7433, GNorm = 1.6966, lr_0 = 5.8711e-04
Loss = 4.7390e-01, PNorm = 53.7603, GNorm = 1.2750, lr_0 = 5.8671e-04
Loss = 4.4945e-01, PNorm = 53.7658, GNorm = 1.1353, lr_0 = 5.8631e-04
Loss = 4.4831e-01, PNorm = 53.7721, GNorm = 1.1777, lr_0 = 5.8591e-04
Loss = 4.1719e-01, PNorm = 53.7768, GNorm = 1.7266, lr_0 = 5.8550e-04
Loss = 4.7075e-01, PNorm = 53.7878, GNorm = 1.5619, lr_0 = 5.8510e-04
Loss = 4.4779e-01, PNorm = 53.8030, GNorm = 1.1861, lr_0 = 5.8470e-04
Loss = 4.0170e-01, PNorm = 53.8141, GNorm = 2.6603, lr_0 = 5.8430e-04
Loss = 4.9744e-01, PNorm = 53.8280, GNorm = 2.2723, lr_0 = 5.8390e-04
Loss = 4.9156e-01, PNorm = 53.8489, GNorm = 0.8755, lr_0 = 5.8350e-04
Loss = 3.8549e-01, PNorm = 53.8621, GNorm = 1.0112, lr_0 = 5.8310e-04
Loss = 4.7075e-01, PNorm = 53.8734, GNorm = 1.2991, lr_0 = 5.8270e-04
Loss = 4.5446e-01, PNorm = 53.8855, GNorm = 1.8279, lr_0 = 5.8230e-04
Loss = 4.6827e-01, PNorm = 53.8924, GNorm = 1.4376, lr_0 = 5.8190e-04
Loss = 3.9830e-01, PNorm = 53.9022, GNorm = 1.5446, lr_0 = 5.8151e-04
Loss = 4.6623e-01, PNorm = 53.9094, GNorm = 1.1474, lr_0 = 5.8111e-04
Loss = 4.1385e-01, PNorm = 53.9231, GNorm = 0.9283, lr_0 = 5.8071e-04
Loss = 4.6589e-01, PNorm = 53.9389, GNorm = 1.6462, lr_0 = 5.8031e-04
Loss = 4.3627e-01, PNorm = 53.9534, GNorm = 1.4327, lr_0 = 5.7991e-04
Loss = 4.6621e-01, PNorm = 53.9696, GNorm = 1.3141, lr_0 = 5.7952e-04
Loss = 4.1843e-01, PNorm = 53.9861, GNorm = 1.2312, lr_0 = 5.7912e-04
Loss = 4.2061e-01, PNorm = 53.9940, GNorm = 1.2945, lr_0 = 5.7872e-04
Loss = 4.0860e-01, PNorm = 54.0055, GNorm = 1.0088, lr_0 = 5.7833e-04
Loss = 4.0736e-01, PNorm = 54.0179, GNorm = 1.0917, lr_0 = 5.7793e-04
Loss = 3.8354e-01, PNorm = 54.0278, GNorm = 1.8161, lr_0 = 5.7753e-04
Loss = 4.4719e-01, PNorm = 54.0299, GNorm = 1.0484, lr_0 = 5.7714e-04
Loss = 4.4678e-01, PNorm = 54.0428, GNorm = 1.8227, lr_0 = 5.7674e-04
Loss = 4.3637e-01, PNorm = 54.0560, GNorm = 1.1383, lr_0 = 5.7635e-04
Loss = 4.9222e-01, PNorm = 54.0657, GNorm = 1.5308, lr_0 = 5.7595e-04
Loss = 3.9980e-01, PNorm = 54.0761, GNorm = 1.1191, lr_0 = 5.7556e-04
Loss = 3.8790e-01, PNorm = 54.0793, GNorm = 1.3689, lr_0 = 5.7516e-04
Loss = 3.8627e-01, PNorm = 54.0872, GNorm = 0.8939, lr_0 = 5.7477e-04
Loss = 4.6844e-01, PNorm = 54.0981, GNorm = 1.4477, lr_0 = 5.7438e-04
Loss = 4.3000e-01, PNorm = 54.1078, GNorm = 1.7582, lr_0 = 5.7398e-04
Loss = 4.1384e-01, PNorm = 54.1165, GNorm = 1.3539, lr_0 = 5.7359e-04
Loss = 4.5962e-01, PNorm = 54.1328, GNorm = 1.1621, lr_0 = 5.7320e-04
Loss = 3.6874e-01, PNorm = 54.1483, GNorm = 1.3616, lr_0 = 5.7280e-04
Loss = 5.0302e-01, PNorm = 54.1555, GNorm = 1.3293, lr_0 = 5.7241e-04
Loss = 4.9882e-01, PNorm = 54.1667, GNorm = 1.6289, lr_0 = 5.7202e-04
Loss = 4.1439e-01, PNorm = 54.1800, GNorm = 1.0568, lr_0 = 5.7163e-04
Loss = 4.4863e-01, PNorm = 54.1887, GNorm = 1.6055, lr_0 = 5.7124e-04
Loss = 3.9690e-01, PNorm = 54.2047, GNorm = 1.0054, lr_0 = 5.7084e-04
Loss = 4.5107e-01, PNorm = 54.2119, GNorm = 1.0933, lr_0 = 5.7045e-04
Loss = 4.1599e-01, PNorm = 54.2238, GNorm = 1.3867, lr_0 = 5.7006e-04
Loss = 4.0832e-01, PNorm = 54.2349, GNorm = 1.2655, lr_0 = 5.6967e-04
Loss = 4.0543e-01, PNorm = 54.2425, GNorm = 0.9547, lr_0 = 5.6928e-04
Loss = 3.8566e-01, PNorm = 54.2516, GNorm = 1.0945, lr_0 = 5.6889e-04
Loss = 4.6952e-01, PNorm = 54.2631, GNorm = 1.4362, lr_0 = 5.6850e-04
Loss = 4.8218e-01, PNorm = 54.2756, GNorm = 1.2251, lr_0 = 5.6811e-04
Loss = 4.7563e-01, PNorm = 54.2884, GNorm = 1.3179, lr_0 = 5.6772e-04
Loss = 4.6950e-01, PNorm = 54.3064, GNorm = 1.4373, lr_0 = 5.6733e-04
Loss = 4.2428e-01, PNorm = 54.3170, GNorm = 2.0663, lr_0 = 5.6695e-04
Loss = 4.1341e-01, PNorm = 54.3253, GNorm = 1.2467, lr_0 = 5.6656e-04
Loss = 4.3556e-01, PNorm = 54.3374, GNorm = 1.2235, lr_0 = 5.6617e-04
Loss = 5.0784e-01, PNorm = 54.3459, GNorm = 1.5250, lr_0 = 5.6578e-04
Loss = 4.4335e-01, PNorm = 54.3637, GNorm = 1.5731, lr_0 = 5.6539e-04
Loss = 4.6789e-01, PNorm = 54.3723, GNorm = 1.8694, lr_0 = 5.6501e-04
Loss = 3.7165e-01, PNorm = 54.3768, GNorm = 1.2250, lr_0 = 5.6462e-04
Loss = 3.7093e-01, PNorm = 54.3882, GNorm = 1.8512, lr_0 = 5.6423e-04
Loss = 4.6063e-01, PNorm = 54.3994, GNorm = 1.1788, lr_0 = 5.6385e-04
Loss = 4.0320e-01, PNorm = 54.4055, GNorm = 1.4398, lr_0 = 5.6346e-04
Loss = 4.3468e-01, PNorm = 54.4129, GNorm = 1.6079, lr_0 = 5.6307e-04
Loss = 3.9628e-01, PNorm = 54.4205, GNorm = 1.6819, lr_0 = 5.6269e-04
Loss = 4.6487e-01, PNorm = 54.4312, GNorm = 2.1682, lr_0 = 5.6230e-04
Validation mae = 0.117031
Epoch 9
Loss = 4.1768e-01, PNorm = 54.4420, GNorm = 1.4832, lr_0 = 5.6192e-04
Loss = 4.1796e-01, PNorm = 54.4495, GNorm = 1.3208, lr_0 = 5.6153e-04
Loss = 4.8429e-01, PNorm = 54.4688, GNorm = 1.1515, lr_0 = 5.6115e-04
Loss = 4.8283e-01, PNorm = 54.4811, GNorm = 2.1148, lr_0 = 5.6076e-04
Loss = 4.1049e-01, PNorm = 54.4944, GNorm = 1.5501, lr_0 = 5.6038e-04
Loss = 3.9884e-01, PNorm = 54.5046, GNorm = 1.4054, lr_0 = 5.6000e-04
Loss = 3.8981e-01, PNorm = 54.5113, GNorm = 1.0599, lr_0 = 5.5961e-04
Loss = 4.6797e-01, PNorm = 54.5178, GNorm = 1.5006, lr_0 = 5.5923e-04
Loss = 3.9116e-01, PNorm = 54.5251, GNorm = 1.1931, lr_0 = 5.5885e-04
Loss = 4.2147e-01, PNorm = 54.5413, GNorm = 1.1688, lr_0 = 5.5846e-04
Loss = 4.6487e-01, PNorm = 54.5577, GNorm = 1.8121, lr_0 = 5.5808e-04
Loss = 3.9960e-01, PNorm = 54.5734, GNorm = 1.2011, lr_0 = 5.5770e-04
Loss = 4.1329e-01, PNorm = 54.5862, GNorm = 1.0945, lr_0 = 5.5732e-04
Loss = 4.1456e-01, PNorm = 54.6007, GNorm = 1.7152, lr_0 = 5.5693e-04
Loss = 4.0472e-01, PNorm = 54.6079, GNorm = 1.2168, lr_0 = 5.5655e-04
Loss = 5.2217e-01, PNorm = 54.6196, GNorm = 1.3353, lr_0 = 5.5617e-04
Loss = 3.9622e-01, PNorm = 54.6278, GNorm = 1.2082, lr_0 = 5.5579e-04
Loss = 5.3155e-01, PNorm = 54.6332, GNorm = 1.4466, lr_0 = 5.5541e-04
Loss = 4.2980e-01, PNorm = 54.6384, GNorm = 2.2065, lr_0 = 5.5503e-04
Loss = 4.4972e-01, PNorm = 54.6471, GNorm = 1.0920, lr_0 = 5.5465e-04
Loss = 4.3544e-01, PNorm = 54.6588, GNorm = 1.2968, lr_0 = 5.5427e-04
Loss = 4.4918e-01, PNorm = 54.6701, GNorm = 1.4067, lr_0 = 5.5389e-04
Loss = 3.9573e-01, PNorm = 54.6848, GNorm = 1.1704, lr_0 = 5.5351e-04
Loss = 4.3801e-01, PNorm = 54.6931, GNorm = 1.3109, lr_0 = 5.5313e-04
Loss = 4.3546e-01, PNorm = 54.7045, GNorm = 1.4826, lr_0 = 5.5275e-04
Loss = 4.4221e-01, PNorm = 54.7143, GNorm = 1.0612, lr_0 = 5.5237e-04
Loss = 3.9968e-01, PNorm = 54.7189, GNorm = 2.5663, lr_0 = 5.5199e-04
Loss = 4.5006e-01, PNorm = 54.7348, GNorm = 1.5509, lr_0 = 5.5162e-04
Loss = 4.3282e-01, PNorm = 54.7501, GNorm = 1.5212, lr_0 = 5.5124e-04
Loss = 4.3919e-01, PNorm = 54.7644, GNorm = 1.0965, lr_0 = 5.5086e-04
Loss = 4.0551e-01, PNorm = 54.7752, GNorm = 1.6288, lr_0 = 5.5048e-04
Loss = 4.2639e-01, PNorm = 54.7884, GNorm = 1.8994, lr_0 = 5.5011e-04
Loss = 4.5886e-01, PNorm = 54.7960, GNorm = 1.4193, lr_0 = 5.4973e-04
Loss = 4.3186e-01, PNorm = 54.8099, GNorm = 1.6257, lr_0 = 5.4935e-04
Loss = 4.8313e-01, PNorm = 54.8223, GNorm = 1.4842, lr_0 = 5.4898e-04
Loss = 3.4891e-01, PNorm = 54.8381, GNorm = 0.9935, lr_0 = 5.4860e-04
Loss = 3.5650e-01, PNorm = 54.8490, GNorm = 1.0393, lr_0 = 5.4822e-04
Loss = 4.2576e-01, PNorm = 54.8653, GNorm = 1.7383, lr_0 = 5.4785e-04
Loss = 4.4433e-01, PNorm = 54.8738, GNorm = 2.4947, lr_0 = 5.4747e-04
Loss = 4.0351e-01, PNorm = 54.8853, GNorm = 1.5556, lr_0 = 5.4710e-04
Loss = 4.0662e-01, PNorm = 54.8979, GNorm = 1.4586, lr_0 = 5.4672e-04
Loss = 4.5453e-01, PNorm = 54.9127, GNorm = 1.8132, lr_0 = 5.4635e-04
Loss = 4.0188e-01, PNorm = 54.9271, GNorm = 0.9845, lr_0 = 5.4597e-04
Loss = 4.4416e-01, PNorm = 54.9345, GNorm = 1.2564, lr_0 = 5.4560e-04
Loss = 3.9831e-01, PNorm = 54.9458, GNorm = 1.0984, lr_0 = 5.4523e-04
Loss = 4.2533e-01, PNorm = 54.9547, GNorm = 1.3828, lr_0 = 5.4485e-04
Loss = 4.6426e-01, PNorm = 54.9661, GNorm = 1.8006, lr_0 = 5.4448e-04
Loss = 3.8853e-01, PNorm = 54.9751, GNorm = 1.7815, lr_0 = 5.4411e-04
Loss = 4.3644e-01, PNorm = 54.9811, GNorm = 1.4254, lr_0 = 5.4373e-04
Loss = 4.3281e-01, PNorm = 54.9969, GNorm = 0.8252, lr_0 = 5.4336e-04
Loss = 4.4648e-01, PNorm = 55.0121, GNorm = 1.5303, lr_0 = 5.4299e-04
Loss = 3.7087e-01, PNorm = 55.0313, GNorm = 0.8778, lr_0 = 5.4262e-04
Loss = 4.2984e-01, PNorm = 55.0393, GNorm = 1.5689, lr_0 = 5.4225e-04
Loss = 4.4860e-01, PNorm = 55.0493, GNorm = 1.3256, lr_0 = 5.4187e-04
Loss = 4.1872e-01, PNorm = 55.0667, GNorm = 1.4950, lr_0 = 5.4150e-04
Loss = 4.1466e-01, PNorm = 55.0781, GNorm = 1.2988, lr_0 = 5.4113e-04
Loss = 4.2872e-01, PNorm = 55.0900, GNorm = 1.1895, lr_0 = 5.4076e-04
Loss = 4.2884e-01, PNorm = 55.1033, GNorm = 1.2915, lr_0 = 5.4039e-04
Loss = 4.2448e-01, PNorm = 55.1132, GNorm = 1.6419, lr_0 = 5.4002e-04
Loss = 4.5079e-01, PNorm = 55.1275, GNorm = 1.5142, lr_0 = 5.3965e-04
Loss = 3.6665e-01, PNorm = 55.1436, GNorm = 1.9014, lr_0 = 5.3928e-04
Loss = 4.9442e-01, PNorm = 55.1507, GNorm = 2.6144, lr_0 = 5.3891e-04
Loss = 4.0161e-01, PNorm = 55.1631, GNorm = 1.2523, lr_0 = 5.3854e-04
Loss = 4.4366e-01, PNorm = 55.1722, GNorm = 1.2777, lr_0 = 5.3817e-04
Loss = 4.2072e-01, PNorm = 55.1861, GNorm = 1.7342, lr_0 = 5.3781e-04
Loss = 4.3527e-01, PNorm = 55.1958, GNorm = 1.2865, lr_0 = 5.3744e-04
Loss = 4.4373e-01, PNorm = 55.2059, GNorm = 1.9340, lr_0 = 5.3707e-04
Loss = 4.5404e-01, PNorm = 55.2213, GNorm = 1.3243, lr_0 = 5.3670e-04
Loss = 4.5172e-01, PNorm = 55.2351, GNorm = 1.2895, lr_0 = 5.3633e-04
Loss = 3.8642e-01, PNorm = 55.2455, GNorm = 1.4953, lr_0 = 5.3597e-04
Loss = 3.4980e-01, PNorm = 55.2530, GNorm = 1.3438, lr_0 = 5.3560e-04
Loss = 4.3264e-01, PNorm = 55.2579, GNorm = 1.7303, lr_0 = 5.3523e-04
Loss = 4.4472e-01, PNorm = 55.2657, GNorm = 0.9428, lr_0 = 5.3486e-04
Loss = 4.4732e-01, PNorm = 55.2843, GNorm = 1.2224, lr_0 = 5.3450e-04
Loss = 4.6784e-01, PNorm = 55.2921, GNorm = 1.7507, lr_0 = 5.3413e-04
Loss = 4.5074e-01, PNorm = 55.3054, GNorm = 1.4411, lr_0 = 5.3377e-04
Loss = 4.0522e-01, PNorm = 55.3215, GNorm = 1.1941, lr_0 = 5.3340e-04
Loss = 4.4446e-01, PNorm = 55.3283, GNorm = 1.6656, lr_0 = 5.3304e-04
Loss = 4.4418e-01, PNorm = 55.3374, GNorm = 1.1439, lr_0 = 5.3267e-04
Loss = 4.5963e-01, PNorm = 55.3479, GNorm = 1.3192, lr_0 = 5.3231e-04
Loss = 4.3451e-01, PNorm = 55.3552, GNorm = 1.1951, lr_0 = 5.3194e-04
Loss = 3.8807e-01, PNorm = 55.3642, GNorm = 1.3525, lr_0 = 5.3158e-04
Loss = 3.8007e-01, PNorm = 55.3729, GNorm = 1.1513, lr_0 = 5.3121e-04
Loss = 3.7142e-01, PNorm = 55.3806, GNorm = 1.0385, lr_0 = 5.3085e-04
Loss = 4.5369e-01, PNorm = 55.3862, GNorm = 1.0121, lr_0 = 5.3048e-04
Loss = 3.9985e-01, PNorm = 55.3987, GNorm = 1.3840, lr_0 = 5.3012e-04
Loss = 4.2394e-01, PNorm = 55.4038, GNorm = 1.6384, lr_0 = 5.2976e-04
Loss = 4.2011e-01, PNorm = 55.4194, GNorm = 0.9658, lr_0 = 5.2939e-04
Loss = 3.8265e-01, PNorm = 55.4313, GNorm = 1.1964, lr_0 = 5.2903e-04
Loss = 3.9395e-01, PNorm = 55.4441, GNorm = 1.1450, lr_0 = 5.2867e-04
Loss = 4.1863e-01, PNorm = 55.4542, GNorm = 2.0722, lr_0 = 5.2831e-04
Loss = 4.2713e-01, PNorm = 55.4602, GNorm = 1.3321, lr_0 = 5.2795e-04
Loss = 4.3648e-01, PNorm = 55.4758, GNorm = 1.7920, lr_0 = 5.2758e-04
Loss = 4.4268e-01, PNorm = 55.4807, GNorm = 1.6589, lr_0 = 5.2722e-04
Loss = 4.4005e-01, PNorm = 55.4856, GNorm = 1.8469, lr_0 = 5.2686e-04
Loss = 4.4774e-01, PNorm = 55.4936, GNorm = 1.2129, lr_0 = 5.2650e-04
Loss = 4.3830e-01, PNorm = 55.5071, GNorm = 1.6150, lr_0 = 5.2614e-04
Loss = 4.3656e-01, PNorm = 55.5221, GNorm = 1.0918, lr_0 = 5.2578e-04
Loss = 4.3401e-01, PNorm = 55.5309, GNorm = 1.2144, lr_0 = 5.2542e-04
Loss = 4.7894e-01, PNorm = 55.5434, GNorm = 1.5647, lr_0 = 5.2506e-04
Loss = 5.4500e-01, PNorm = 55.5455, GNorm = 1.7229, lr_0 = 5.2470e-04
Loss = 4.4037e-01, PNorm = 55.5577, GNorm = 1.2941, lr_0 = 5.2434e-04
Loss = 3.5082e-01, PNorm = 55.5663, GNorm = 1.0508, lr_0 = 5.2398e-04
Loss = 3.8146e-01, PNorm = 55.5713, GNorm = 1.3927, lr_0 = 5.2362e-04
Loss = 3.7297e-01, PNorm = 55.5810, GNorm = 1.1219, lr_0 = 5.2326e-04
Loss = 3.7566e-01, PNorm = 55.5876, GNorm = 1.3083, lr_0 = 5.2290e-04
Loss = 4.2911e-01, PNorm = 55.5951, GNorm = 1.4773, lr_0 = 5.2255e-04
Loss = 4.4145e-01, PNorm = 55.6035, GNorm = 1.2903, lr_0 = 5.2219e-04
Loss = 5.0386e-01, PNorm = 55.6192, GNorm = 1.2640, lr_0 = 5.2183e-04
Loss = 4.2561e-01, PNorm = 55.6278, GNorm = 1.7056, lr_0 = 5.2147e-04
Loss = 3.5138e-01, PNorm = 55.6379, GNorm = 1.3983, lr_0 = 5.2112e-04
Loss = 3.9279e-01, PNorm = 55.6430, GNorm = 1.6219, lr_0 = 5.2076e-04
Loss = 4.3044e-01, PNorm = 55.6488, GNorm = 1.2999, lr_0 = 5.2040e-04
Loss = 4.6254e-01, PNorm = 55.6592, GNorm = 1.8204, lr_0 = 5.2005e-04
Loss = 4.2780e-01, PNorm = 55.6680, GNorm = 1.5455, lr_0 = 5.1969e-04
Loss = 4.1705e-01, PNorm = 55.6739, GNorm = 0.9882, lr_0 = 5.1933e-04
Loss = 4.2451e-01, PNorm = 55.6817, GNorm = 2.2186, lr_0 = 5.1898e-04
Loss = 3.9995e-01, PNorm = 55.6895, GNorm = 1.2304, lr_0 = 5.1862e-04
Loss = 4.4046e-01, PNorm = 55.6965, GNorm = 1.5235, lr_0 = 5.1827e-04
Loss = 3.8305e-01, PNorm = 55.7060, GNorm = 0.9638, lr_0 = 5.1791e-04
Validation mae = 0.115520
Epoch 10
Loss = 4.2814e-01, PNorm = 55.7131, GNorm = 1.5910, lr_0 = 5.1756e-04
Loss = 4.0027e-01, PNorm = 55.7213, GNorm = 2.1822, lr_0 = 5.1720e-04
Loss = 3.8458e-01, PNorm = 55.7288, GNorm = 1.7068, lr_0 = 5.1685e-04
Loss = 4.8474e-01, PNorm = 55.7355, GNorm = 1.2887, lr_0 = 5.1649e-04
Loss = 4.9666e-01, PNorm = 55.7499, GNorm = 1.2818, lr_0 = 5.1614e-04
Loss = 4.2384e-01, PNorm = 55.7576, GNorm = 1.3540, lr_0 = 5.1579e-04
Loss = 4.3112e-01, PNorm = 55.7645, GNorm = 1.7377, lr_0 = 5.1543e-04
Loss = 4.2840e-01, PNorm = 55.7705, GNorm = 0.8657, lr_0 = 5.1508e-04
Loss = 4.1680e-01, PNorm = 55.7872, GNorm = 1.4413, lr_0 = 5.1473e-04
Loss = 3.9145e-01, PNorm = 55.7936, GNorm = 1.8613, lr_0 = 5.1437e-04
Loss = 4.8513e-01, PNorm = 55.8009, GNorm = 1.3774, lr_0 = 5.1402e-04
Loss = 3.7840e-01, PNorm = 55.8136, GNorm = 1.2493, lr_0 = 5.1367e-04
Loss = 4.1994e-01, PNorm = 55.8230, GNorm = 1.7496, lr_0 = 5.1332e-04
Loss = 3.8612e-01, PNorm = 55.8344, GNorm = 1.5152, lr_0 = 5.1297e-04
Loss = 4.3437e-01, PNorm = 55.8516, GNorm = 1.3470, lr_0 = 5.1262e-04
Loss = 3.9882e-01, PNorm = 55.8631, GNorm = 0.9798, lr_0 = 5.1226e-04
Loss = 4.2677e-01, PNorm = 55.8721, GNorm = 1.5998, lr_0 = 5.1191e-04
Loss = 4.2480e-01, PNorm = 55.8858, GNorm = 1.0313, lr_0 = 5.1156e-04
Loss = 3.7325e-01, PNorm = 55.8923, GNorm = 0.9870, lr_0 = 5.1121e-04
Loss = 4.2472e-01, PNorm = 55.8979, GNorm = 1.3888, lr_0 = 5.1086e-04
Loss = 3.9919e-01, PNorm = 55.9067, GNorm = 2.5982, lr_0 = 5.1051e-04
Loss = 4.1817e-01, PNorm = 55.9216, GNorm = 1.2582, lr_0 = 5.1016e-04
Loss = 4.3215e-01, PNorm = 55.9304, GNorm = 1.3027, lr_0 = 5.0981e-04
Loss = 3.9419e-01, PNorm = 55.9407, GNorm = 1.9524, lr_0 = 5.0946e-04
Loss = 3.8123e-01, PNorm = 55.9497, GNorm = 1.2761, lr_0 = 5.0911e-04
Loss = 3.9902e-01, PNorm = 55.9555, GNorm = 1.2584, lr_0 = 5.0877e-04
Loss = 4.1202e-01, PNorm = 55.9608, GNorm = 1.3635, lr_0 = 5.0842e-04
Loss = 5.1204e-01, PNorm = 55.9673, GNorm = 1.2644, lr_0 = 5.0807e-04
Loss = 4.5027e-01, PNorm = 55.9744, GNorm = 0.9858, lr_0 = 5.0772e-04
Loss = 4.4687e-01, PNorm = 55.9852, GNorm = 1.7941, lr_0 = 5.0737e-04
Loss = 4.1601e-01, PNorm = 55.9873, GNorm = 1.5246, lr_0 = 5.0703e-04
Loss = 3.7466e-01, PNorm = 55.9996, GNorm = 1.0715, lr_0 = 5.0668e-04
Loss = 4.1951e-01, PNorm = 56.0042, GNorm = 1.5596, lr_0 = 5.0633e-04
Loss = 4.1958e-01, PNorm = 56.0112, GNorm = 1.1828, lr_0 = 5.0598e-04
Loss = 4.2491e-01, PNorm = 56.0192, GNorm = 1.5920, lr_0 = 5.0564e-04
Loss = 4.1070e-01, PNorm = 56.0258, GNorm = 0.9922, lr_0 = 5.0529e-04
Loss = 4.9086e-01, PNorm = 56.0353, GNorm = 1.5060, lr_0 = 5.0494e-04
Loss = 3.9903e-01, PNorm = 56.0475, GNorm = 0.8743, lr_0 = 5.0460e-04
Loss = 3.8900e-01, PNorm = 56.0583, GNorm = 1.2207, lr_0 = 5.0425e-04
Loss = 4.3854e-01, PNorm = 56.0705, GNorm = 1.9236, lr_0 = 5.0391e-04
Loss = 3.9085e-01, PNorm = 56.0825, GNorm = 1.5490, lr_0 = 5.0356e-04
Loss = 4.9400e-01, PNorm = 56.0921, GNorm = 1.9440, lr_0 = 5.0322e-04
Loss = 4.5159e-01, PNorm = 56.1055, GNorm = 1.3775, lr_0 = 5.0287e-04
Loss = 4.0782e-01, PNorm = 56.1150, GNorm = 1.1470, lr_0 = 5.0253e-04
Loss = 5.4057e-01, PNorm = 56.1267, GNorm = 1.8261, lr_0 = 5.0218e-04
Loss = 4.5355e-01, PNorm = 56.1407, GNorm = 2.3063, lr_0 = 5.0184e-04
Loss = 4.3328e-01, PNorm = 56.1474, GNorm = 1.7719, lr_0 = 5.0150e-04
Loss = 4.5986e-01, PNorm = 56.1526, GNorm = 1.4180, lr_0 = 5.0115e-04
Loss = 4.2777e-01, PNorm = 56.1602, GNorm = 1.4403, lr_0 = 5.0081e-04
Loss = 4.1171e-01, PNorm = 56.1692, GNorm = 1.1252, lr_0 = 5.0047e-04
Loss = 4.6975e-01, PNorm = 56.1792, GNorm = 1.4229, lr_0 = 5.0012e-04
Loss = 3.6520e-01, PNorm = 56.1849, GNorm = 1.0719, lr_0 = 4.9978e-04
Loss = 3.7163e-01, PNorm = 56.1921, GNorm = 2.4677, lr_0 = 4.9944e-04
Loss = 4.3784e-01, PNorm = 56.1977, GNorm = 1.9359, lr_0 = 4.9910e-04
Loss = 3.8667e-01, PNorm = 56.2157, GNorm = 1.2669, lr_0 = 4.9875e-04
Loss = 4.3236e-01, PNorm = 56.2211, GNorm = 0.9459, lr_0 = 4.9841e-04
Loss = 4.4187e-01, PNorm = 56.2306, GNorm = 1.4252, lr_0 = 4.9807e-04
Loss = 3.9221e-01, PNorm = 56.2398, GNorm = 3.5052, lr_0 = 4.9773e-04
Loss = 4.3604e-01, PNorm = 56.2387, GNorm = 1.2004, lr_0 = 4.9739e-04
Loss = 4.2975e-01, PNorm = 56.2478, GNorm = 1.1319, lr_0 = 4.9705e-04
Loss = 3.7112e-01, PNorm = 56.2574, GNorm = 2.5119, lr_0 = 4.9671e-04
Loss = 3.7422e-01, PNorm = 56.2652, GNorm = 1.2834, lr_0 = 4.9637e-04
Loss = 3.9297e-01, PNorm = 56.2780, GNorm = 1.8666, lr_0 = 4.9603e-04
Loss = 3.9569e-01, PNorm = 56.2884, GNorm = 2.1733, lr_0 = 4.9569e-04
Loss = 4.3198e-01, PNorm = 56.2901, GNorm = 1.2546, lr_0 = 4.9535e-04
Loss = 3.7535e-01, PNorm = 56.2933, GNorm = 1.2350, lr_0 = 4.9501e-04
Loss = 3.8809e-01, PNorm = 56.2979, GNorm = 1.2622, lr_0 = 4.9467e-04
Loss = 4.6778e-01, PNorm = 56.3123, GNorm = 1.5669, lr_0 = 4.9433e-04
Loss = 4.1799e-01, PNorm = 56.3244, GNorm = 1.4362, lr_0 = 4.9399e-04
Loss = 4.9346e-01, PNorm = 56.3300, GNorm = 2.3259, lr_0 = 4.9365e-04
Loss = 4.4605e-01, PNorm = 56.3361, GNorm = 1.3123, lr_0 = 4.9332e-04
Loss = 3.8960e-01, PNorm = 56.3513, GNorm = 1.5757, lr_0 = 4.9298e-04
Loss = 4.2805e-01, PNorm = 56.3646, GNorm = 3.6733, lr_0 = 4.9264e-04
Loss = 4.4497e-01, PNorm = 56.3736, GNorm = 2.9130, lr_0 = 4.9230e-04
Loss = 4.0138e-01, PNorm = 56.3787, GNorm = 1.1397, lr_0 = 4.9197e-04
Loss = 3.9733e-01, PNorm = 56.3821, GNorm = 1.6699, lr_0 = 4.9163e-04
Loss = 4.0719e-01, PNorm = 56.3868, GNorm = 0.9631, lr_0 = 4.9129e-04
Loss = 4.3710e-01, PNorm = 56.3988, GNorm = 1.7520, lr_0 = 4.9095e-04
Loss = 3.8654e-01, PNorm = 56.4094, GNorm = 1.1985, lr_0 = 4.9062e-04
Loss = 4.1384e-01, PNorm = 56.4242, GNorm = 1.1808, lr_0 = 4.9028e-04
Loss = 3.3451e-01, PNorm = 56.4305, GNorm = 1.1751, lr_0 = 4.8995e-04
Loss = 4.2655e-01, PNorm = 56.4369, GNorm = 1.2103, lr_0 = 4.8961e-04
Loss = 4.2746e-01, PNorm = 56.4415, GNorm = 1.4014, lr_0 = 4.8928e-04
Loss = 4.3983e-01, PNorm = 56.4486, GNorm = 1.5821, lr_0 = 4.8894e-04
Loss = 4.2844e-01, PNorm = 56.4550, GNorm = 1.2093, lr_0 = 4.8861e-04
Loss = 4.1565e-01, PNorm = 56.4672, GNorm = 1.3781, lr_0 = 4.8827e-04
Loss = 4.4506e-01, PNorm = 56.4742, GNorm = 1.4973, lr_0 = 4.8794e-04
Loss = 3.9382e-01, PNorm = 56.4795, GNorm = 1.5977, lr_0 = 4.8760e-04
Loss = 3.5937e-01, PNorm = 56.4929, GNorm = 1.3802, lr_0 = 4.8727e-04
Loss = 4.4157e-01, PNorm = 56.4967, GNorm = 3.0005, lr_0 = 4.8693e-04
Loss = 3.6784e-01, PNorm = 56.5087, GNorm = 1.1535, lr_0 = 4.8660e-04
Loss = 4.5837e-01, PNorm = 56.5215, GNorm = 1.1800, lr_0 = 4.8627e-04
Loss = 3.4863e-01, PNorm = 56.5350, GNorm = 1.1312, lr_0 = 4.8593e-04
Loss = 4.8082e-01, PNorm = 56.5397, GNorm = 2.4469, lr_0 = 4.8560e-04
Loss = 4.0966e-01, PNorm = 56.5541, GNorm = 1.2527, lr_0 = 4.8527e-04
Loss = 3.9115e-01, PNorm = 56.5603, GNorm = 2.0712, lr_0 = 4.8494e-04
Loss = 4.3643e-01, PNorm = 56.5701, GNorm = 1.8660, lr_0 = 4.8460e-04
Loss = 3.7880e-01, PNorm = 56.5753, GNorm = 1.4061, lr_0 = 4.8427e-04
Loss = 3.6183e-01, PNorm = 56.5847, GNorm = 1.4194, lr_0 = 4.8394e-04
Loss = 4.1998e-01, PNorm = 56.5876, GNorm = 1.1497, lr_0 = 4.8361e-04
Loss = 4.3545e-01, PNorm = 56.5967, GNorm = 1.1529, lr_0 = 4.8328e-04
Loss = 3.9599e-01, PNorm = 56.6058, GNorm = 2.1120, lr_0 = 4.8295e-04
Loss = 4.0903e-01, PNorm = 56.6109, GNorm = 1.1994, lr_0 = 4.8262e-04
Loss = 3.9394e-01, PNorm = 56.6224, GNorm = 1.2487, lr_0 = 4.8228e-04
Loss = 3.9235e-01, PNorm = 56.6305, GNorm = 1.5068, lr_0 = 4.8195e-04
Loss = 3.7838e-01, PNorm = 56.6402, GNorm = 1.4198, lr_0 = 4.8162e-04
Loss = 3.9914e-01, PNorm = 56.6462, GNorm = 1.5520, lr_0 = 4.8129e-04
Loss = 4.1054e-01, PNorm = 56.6541, GNorm = 0.8296, lr_0 = 4.8096e-04
Loss = 3.7394e-01, PNorm = 56.6612, GNorm = 1.1576, lr_0 = 4.8064e-04
Loss = 4.0975e-01, PNorm = 56.6672, GNorm = 1.1324, lr_0 = 4.8031e-04
Loss = 3.7617e-01, PNorm = 56.6701, GNorm = 1.3761, lr_0 = 4.7998e-04
Loss = 4.5489e-01, PNorm = 56.6763, GNorm = 1.7443, lr_0 = 4.7965e-04
Loss = 4.3473e-01, PNorm = 56.6874, GNorm = 1.4697, lr_0 = 4.7932e-04
Loss = 4.0978e-01, PNorm = 56.7000, GNorm = 1.1575, lr_0 = 4.7899e-04
Loss = 4.3509e-01, PNorm = 56.7094, GNorm = 1.9104, lr_0 = 4.7866e-04
Loss = 3.6301e-01, PNorm = 56.7162, GNorm = 1.5037, lr_0 = 4.7833e-04
Loss = 4.3019e-01, PNorm = 56.7249, GNorm = 0.9821, lr_0 = 4.7801e-04
Loss = 4.2632e-01, PNorm = 56.7333, GNorm = 2.2155, lr_0 = 4.7768e-04
Loss = 3.7338e-01, PNorm = 56.7381, GNorm = 1.5448, lr_0 = 4.7735e-04
Loss = 4.0298e-01, PNorm = 56.7368, GNorm = 1.1155, lr_0 = 4.7703e-04
Validation mae = 0.116665
Epoch 11
Loss = 3.9717e-01, PNorm = 56.7417, GNorm = 1.9418, lr_0 = 4.7670e-04
Loss = 4.3960e-01, PNorm = 56.7534, GNorm = 1.5348, lr_0 = 4.7637e-04
Loss = 4.5251e-01, PNorm = 56.7631, GNorm = 2.2686, lr_0 = 4.7605e-04
Loss = 3.5496e-01, PNorm = 56.7754, GNorm = 2.1815, lr_0 = 4.7572e-04
Loss = 4.0856e-01, PNorm = 56.7832, GNorm = 1.6108, lr_0 = 4.7539e-04
Loss = 4.1155e-01, PNorm = 56.7930, GNorm = 1.3764, lr_0 = 4.7507e-04
Loss = 3.9573e-01, PNorm = 56.7985, GNorm = 1.5062, lr_0 = 4.7474e-04
Loss = 3.5403e-01, PNorm = 56.8031, GNorm = 1.5501, lr_0 = 4.7442e-04
Loss = 4.3286e-01, PNorm = 56.8146, GNorm = 1.2008, lr_0 = 4.7409e-04
Loss = 3.5906e-01, PNorm = 56.8225, GNorm = 1.3146, lr_0 = 4.7377e-04
Loss = 4.2739e-01, PNorm = 56.8303, GNorm = 1.1864, lr_0 = 4.7344e-04
Loss = 4.7107e-01, PNorm = 56.8377, GNorm = 2.1548, lr_0 = 4.7312e-04
Loss = 4.5735e-01, PNorm = 56.8438, GNorm = 1.2727, lr_0 = 4.7279e-04
Loss = 4.4860e-01, PNorm = 56.8489, GNorm = 1.7609, lr_0 = 4.7247e-04
Loss = 4.2562e-01, PNorm = 56.8591, GNorm = 1.2869, lr_0 = 4.7215e-04
Loss = 3.6760e-01, PNorm = 56.8689, GNorm = 1.0728, lr_0 = 4.7182e-04
Loss = 3.6852e-01, PNorm = 56.8815, GNorm = 1.4198, lr_0 = 4.7150e-04
Loss = 4.2111e-01, PNorm = 56.8868, GNorm = 1.5898, lr_0 = 4.7118e-04
Loss = 4.2393e-01, PNorm = 56.8944, GNorm = 1.7635, lr_0 = 4.7085e-04
Loss = 3.5000e-01, PNorm = 56.8994, GNorm = 0.9842, lr_0 = 4.7053e-04
Loss = 4.3681e-01, PNorm = 56.9084, GNorm = 2.6233, lr_0 = 4.7021e-04
Loss = 4.3548e-01, PNorm = 56.9224, GNorm = 1.5266, lr_0 = 4.6989e-04
Loss = 3.8483e-01, PNorm = 56.9366, GNorm = 0.9171, lr_0 = 4.6957e-04
Loss = 3.7975e-01, PNorm = 56.9483, GNorm = 1.1685, lr_0 = 4.6924e-04
Loss = 3.6128e-01, PNorm = 56.9570, GNorm = 1.4802, lr_0 = 4.6892e-04
Loss = 4.3685e-01, PNorm = 56.9641, GNorm = 1.4268, lr_0 = 4.6860e-04
Loss = 4.3153e-01, PNorm = 56.9759, GNorm = 1.4644, lr_0 = 4.6828e-04
Loss = 3.6876e-01, PNorm = 56.9819, GNorm = 1.6956, lr_0 = 4.6796e-04
Loss = 3.5783e-01, PNorm = 56.9884, GNorm = 1.6243, lr_0 = 4.6764e-04
Loss = 3.5807e-01, PNorm = 56.9968, GNorm = 1.0632, lr_0 = 4.6732e-04
Loss = 4.9647e-01, PNorm = 57.0004, GNorm = 1.4750, lr_0 = 4.6700e-04
Loss = 3.8939e-01, PNorm = 57.0067, GNorm = 1.2816, lr_0 = 4.6668e-04
Loss = 4.1624e-01, PNorm = 57.0106, GNorm = 1.5169, lr_0 = 4.6636e-04
Loss = 4.1082e-01, PNorm = 57.0213, GNorm = 1.2856, lr_0 = 4.6604e-04
Loss = 4.3785e-01, PNorm = 57.0321, GNorm = 1.7398, lr_0 = 4.6572e-04
Loss = 3.5384e-01, PNorm = 57.0437, GNorm = 1.0008, lr_0 = 4.6540e-04
Loss = 4.5943e-01, PNorm = 57.0553, GNorm = 1.5054, lr_0 = 4.6508e-04
Loss = 4.0142e-01, PNorm = 57.0609, GNorm = 1.0421, lr_0 = 4.6476e-04
Loss = 4.4861e-01, PNorm = 57.0699, GNorm = 1.7757, lr_0 = 4.6445e-04
Loss = 4.0062e-01, PNorm = 57.0750, GNorm = 0.8222, lr_0 = 4.6413e-04
Loss = 3.8514e-01, PNorm = 57.0845, GNorm = 1.8600, lr_0 = 4.6381e-04
Loss = 4.0522e-01, PNorm = 57.0969, GNorm = 1.3179, lr_0 = 4.6349e-04
Loss = 3.9642e-01, PNorm = 57.1077, GNorm = 1.2293, lr_0 = 4.6317e-04
Loss = 4.4061e-01, PNorm = 57.1120, GNorm = 3.7603, lr_0 = 4.6286e-04
Loss = 4.4996e-01, PNorm = 57.1204, GNorm = 1.6123, lr_0 = 4.6254e-04
Loss = 4.3351e-01, PNorm = 57.1311, GNorm = 1.3706, lr_0 = 4.6222e-04
Loss = 3.5940e-01, PNorm = 57.1410, GNorm = 1.2924, lr_0 = 4.6191e-04
Loss = 4.1356e-01, PNorm = 57.1505, GNorm = 1.5843, lr_0 = 4.6159e-04
Loss = 4.1165e-01, PNorm = 57.1498, GNorm = 1.1805, lr_0 = 4.6127e-04
Loss = 3.7303e-01, PNorm = 57.1590, GNorm = 1.5290, lr_0 = 4.6096e-04
Loss = 4.1963e-01, PNorm = 57.1726, GNorm = 1.2283, lr_0 = 4.6064e-04
Loss = 3.6391e-01, PNorm = 57.1800, GNorm = 1.3196, lr_0 = 4.6033e-04
Loss = 3.8448e-01, PNorm = 57.1909, GNorm = 1.3656, lr_0 = 4.6001e-04
Loss = 4.0794e-01, PNorm = 57.2015, GNorm = 2.3761, lr_0 = 4.5970e-04
Loss = 4.1078e-01, PNorm = 57.2118, GNorm = 1.2432, lr_0 = 4.5938e-04
Loss = 4.6204e-01, PNorm = 57.2160, GNorm = 1.5529, lr_0 = 4.5907e-04
Loss = 4.0794e-01, PNorm = 57.2329, GNorm = 2.7075, lr_0 = 4.5875e-04
Loss = 3.7922e-01, PNorm = 57.2410, GNorm = 0.9142, lr_0 = 4.5844e-04
Loss = 3.6952e-01, PNorm = 57.2460, GNorm = 1.0115, lr_0 = 4.5812e-04
Loss = 4.2374e-01, PNorm = 57.2522, GNorm = 2.4046, lr_0 = 4.5781e-04
Loss = 5.0007e-01, PNorm = 57.2589, GNorm = 1.8837, lr_0 = 4.5750e-04
Loss = 4.2367e-01, PNorm = 57.2649, GNorm = 1.4013, lr_0 = 4.5718e-04
Loss = 4.2680e-01, PNorm = 57.2750, GNorm = 1.4678, lr_0 = 4.5687e-04
Loss = 3.8619e-01, PNorm = 57.2836, GNorm = 1.2001, lr_0 = 4.5656e-04
Loss = 4.1514e-01, PNorm = 57.2965, GNorm = 1.7736, lr_0 = 4.5624e-04
Loss = 4.1446e-01, PNorm = 57.3041, GNorm = 1.6943, lr_0 = 4.5593e-04
Loss = 4.5011e-01, PNorm = 57.3166, GNorm = 1.4844, lr_0 = 4.5562e-04
Loss = 3.8558e-01, PNorm = 57.3214, GNorm = 1.3914, lr_0 = 4.5531e-04
Loss = 4.4872e-01, PNorm = 57.3265, GNorm = 2.0191, lr_0 = 4.5499e-04
Loss = 3.9115e-01, PNorm = 57.3317, GNorm = 1.3211, lr_0 = 4.5468e-04
Loss = 3.6282e-01, PNorm = 57.3380, GNorm = 1.1214, lr_0 = 4.5437e-04
Loss = 4.0415e-01, PNorm = 57.3467, GNorm = 1.0674, lr_0 = 4.5406e-04
Loss = 4.0712e-01, PNorm = 57.3492, GNorm = 1.1126, lr_0 = 4.5375e-04
Loss = 4.0304e-01, PNorm = 57.3555, GNorm = 1.2755, lr_0 = 4.5344e-04
Loss = 4.7351e-01, PNorm = 57.3542, GNorm = 1.5942, lr_0 = 4.5313e-04
Loss = 3.8772e-01, PNorm = 57.3597, GNorm = 1.2036, lr_0 = 4.5282e-04
Loss = 4.2553e-01, PNorm = 57.3664, GNorm = 1.2104, lr_0 = 4.5251e-04
Loss = 4.0794e-01, PNorm = 57.3750, GNorm = 1.9853, lr_0 = 4.5220e-04
Loss = 4.1349e-01, PNorm = 57.3775, GNorm = 1.2297, lr_0 = 4.5189e-04
Loss = 3.7780e-01, PNorm = 57.3881, GNorm = 1.6428, lr_0 = 4.5158e-04
Loss = 4.2404e-01, PNorm = 57.3993, GNorm = 1.8709, lr_0 = 4.5127e-04
Loss = 4.1340e-01, PNorm = 57.4095, GNorm = 1.4611, lr_0 = 4.5096e-04
Loss = 3.6914e-01, PNorm = 57.4195, GNorm = 1.4336, lr_0 = 4.5065e-04
Loss = 4.0211e-01, PNorm = 57.4255, GNorm = 1.1767, lr_0 = 4.5034e-04
Loss = 3.9925e-01, PNorm = 57.4338, GNorm = 1.1899, lr_0 = 4.5003e-04
Loss = 3.7614e-01, PNorm = 57.4449, GNorm = 1.2292, lr_0 = 4.4972e-04
Loss = 4.2914e-01, PNorm = 57.4447, GNorm = 1.7212, lr_0 = 4.4942e-04
Loss = 4.1302e-01, PNorm = 57.4524, GNorm = 1.0504, lr_0 = 4.4911e-04
Loss = 3.6753e-01, PNorm = 57.4596, GNorm = 1.5596, lr_0 = 4.4880e-04
Loss = 4.4067e-01, PNorm = 57.4662, GNorm = 1.7021, lr_0 = 4.4849e-04
Loss = 4.2361e-01, PNorm = 57.4768, GNorm = 1.5451, lr_0 = 4.4819e-04
Loss = 4.1137e-01, PNorm = 57.4791, GNorm = 1.4975, lr_0 = 4.4788e-04
Loss = 4.2529e-01, PNorm = 57.4825, GNorm = 1.2310, lr_0 = 4.4757e-04
Loss = 4.4651e-01, PNorm = 57.4848, GNorm = 1.5655, lr_0 = 4.4727e-04
Loss = 4.5317e-01, PNorm = 57.4897, GNorm = 1.8953, lr_0 = 4.4696e-04
Loss = 3.9585e-01, PNorm = 57.5029, GNorm = 1.7974, lr_0 = 4.4665e-04
Loss = 4.1420e-01, PNorm = 57.5127, GNorm = 1.2879, lr_0 = 4.4635e-04
Loss = 4.6618e-01, PNorm = 57.5197, GNorm = 2.1963, lr_0 = 4.4604e-04
Loss = 3.8870e-01, PNorm = 57.5257, GNorm = 1.2777, lr_0 = 4.4574e-04
Loss = 4.3776e-01, PNorm = 57.5335, GNorm = 1.3847, lr_0 = 4.4543e-04
Loss = 3.9408e-01, PNorm = 57.5437, GNorm = 1.0668, lr_0 = 4.4513e-04
Loss = 4.3301e-01, PNorm = 57.5551, GNorm = 1.5257, lr_0 = 4.4482e-04
Loss = 4.0817e-01, PNorm = 57.5597, GNorm = 1.1901, lr_0 = 4.4452e-04
Loss = 4.2862e-01, PNorm = 57.5656, GNorm = 1.5054, lr_0 = 4.4421e-04
Loss = 4.0687e-01, PNorm = 57.5730, GNorm = 1.4199, lr_0 = 4.4391e-04
Loss = 4.2462e-01, PNorm = 57.5779, GNorm = 2.2040, lr_0 = 4.4360e-04
Loss = 4.4406e-01, PNorm = 57.5819, GNorm = 1.5421, lr_0 = 4.4330e-04
Loss = 3.9772e-01, PNorm = 57.5891, GNorm = 1.2355, lr_0 = 4.4299e-04
Loss = 4.1752e-01, PNorm = 57.5966, GNorm = 1.6070, lr_0 = 4.4269e-04
Loss = 3.4294e-01, PNorm = 57.6045, GNorm = 1.5841, lr_0 = 4.4239e-04
Loss = 3.8299e-01, PNorm = 57.6074, GNorm = 1.1567, lr_0 = 4.4209e-04
Loss = 4.1888e-01, PNorm = 57.6159, GNorm = 1.2554, lr_0 = 4.4178e-04
Loss = 4.9032e-01, PNorm = 57.6215, GNorm = 1.7959, lr_0 = 4.4148e-04
Loss = 3.9898e-01, PNorm = 57.6270, GNorm = 1.2681, lr_0 = 4.4118e-04
Loss = 3.8076e-01, PNorm = 57.6379, GNorm = 0.9163, lr_0 = 4.4088e-04
Loss = 4.1282e-01, PNorm = 57.6448, GNorm = 1.1539, lr_0 = 4.4057e-04
Loss = 4.6961e-01, PNorm = 57.6502, GNorm = 1.4328, lr_0 = 4.4027e-04
Loss = 4.0251e-01, PNorm = 57.6580, GNorm = 1.7084, lr_0 = 4.3997e-04
Loss = 3.6377e-01, PNorm = 57.6634, GNorm = 1.2408, lr_0 = 4.3967e-04
Loss = 4.8317e-01, PNorm = 57.6676, GNorm = 1.3407, lr_0 = 4.3937e-04
Validation mae = 0.114579
Epoch 12
Loss = 3.6584e-01, PNorm = 57.6766, GNorm = 1.5348, lr_0 = 4.3907e-04
Loss = 3.8722e-01, PNorm = 57.6852, GNorm = 1.3040, lr_0 = 4.3877e-04
Loss = 3.4825e-01, PNorm = 57.6948, GNorm = 1.0656, lr_0 = 4.3846e-04
Loss = 4.2345e-01, PNorm = 57.7020, GNorm = 1.6501, lr_0 = 4.3816e-04
Loss = 4.1528e-01, PNorm = 57.7086, GNorm = 1.6977, lr_0 = 4.3786e-04
Loss = 3.7343e-01, PNorm = 57.7137, GNorm = 1.6689, lr_0 = 4.3756e-04
Loss = 3.7581e-01, PNorm = 57.7247, GNorm = 1.2066, lr_0 = 4.3726e-04
Loss = 4.0936e-01, PNorm = 57.7407, GNorm = 1.6284, lr_0 = 4.3696e-04
Loss = 4.1623e-01, PNorm = 57.7480, GNorm = 2.1229, lr_0 = 4.3667e-04
Loss = 3.8703e-01, PNorm = 57.7574, GNorm = 1.2967, lr_0 = 4.3637e-04
Loss = 3.5092e-01, PNorm = 57.7716, GNorm = 1.9197, lr_0 = 4.3607e-04
Loss = 4.0399e-01, PNorm = 57.7768, GNorm = 1.6825, lr_0 = 4.3577e-04
Loss = 3.3613e-01, PNorm = 57.7869, GNorm = 1.2742, lr_0 = 4.3547e-04
Loss = 4.0745e-01, PNorm = 57.7899, GNorm = 1.3400, lr_0 = 4.3517e-04
Loss = 4.1854e-01, PNorm = 57.7950, GNorm = 1.3449, lr_0 = 4.3487e-04
Loss = 4.3194e-01, PNorm = 57.7965, GNorm = 1.2690, lr_0 = 4.3458e-04
Loss = 3.7887e-01, PNorm = 57.8016, GNorm = 1.4904, lr_0 = 4.3428e-04
Loss = 3.9762e-01, PNorm = 57.8071, GNorm = 1.2189, lr_0 = 4.3398e-04
Loss = 4.7735e-01, PNorm = 57.8109, GNorm = 2.2162, lr_0 = 4.3368e-04
Loss = 4.2691e-01, PNorm = 57.8201, GNorm = 1.7663, lr_0 = 4.3339e-04
Loss = 4.0185e-01, PNorm = 57.8285, GNorm = 1.4925, lr_0 = 4.3309e-04
Loss = 3.6651e-01, PNorm = 57.8376, GNorm = 1.3909, lr_0 = 4.3279e-04
Loss = 3.7596e-01, PNorm = 57.8464, GNorm = 1.7642, lr_0 = 4.3250e-04
Loss = 4.7400e-01, PNorm = 57.8540, GNorm = 1.4754, lr_0 = 4.3220e-04
Loss = 3.8460e-01, PNorm = 57.8622, GNorm = 2.1474, lr_0 = 4.3190e-04
Loss = 4.0952e-01, PNorm = 57.8648, GNorm = 1.3797, lr_0 = 4.3161e-04
Loss = 3.7186e-01, PNorm = 57.8696, GNorm = 1.0339, lr_0 = 4.3131e-04
Loss = 3.5683e-01, PNorm = 57.8764, GNorm = 0.9477, lr_0 = 4.3102e-04
Loss = 4.0786e-01, PNorm = 57.8846, GNorm = 1.5419, lr_0 = 4.3072e-04
Loss = 4.3669e-01, PNorm = 57.8904, GNorm = 1.5276, lr_0 = 4.3043e-04
Loss = 4.0434e-01, PNorm = 57.8941, GNorm = 1.3455, lr_0 = 4.3013e-04
Loss = 3.8888e-01, PNorm = 57.9032, GNorm = 1.5877, lr_0 = 4.2984e-04
Loss = 3.6540e-01, PNorm = 57.9058, GNorm = 1.3027, lr_0 = 4.2954e-04
Loss = 3.5006e-01, PNorm = 57.9143, GNorm = 1.9664, lr_0 = 4.2925e-04
Loss = 3.9485e-01, PNorm = 57.9192, GNorm = 1.5696, lr_0 = 4.2895e-04
Loss = 4.7798e-01, PNorm = 57.9301, GNorm = 2.6503, lr_0 = 4.2866e-04
Loss = 4.3643e-01, PNorm = 57.9363, GNorm = 2.1984, lr_0 = 4.2837e-04
Loss = 4.4784e-01, PNorm = 57.9470, GNorm = 1.4386, lr_0 = 4.2807e-04
Loss = 4.4340e-01, PNorm = 57.9569, GNorm = 1.6379, lr_0 = 4.2778e-04
Loss = 4.1368e-01, PNorm = 57.9687, GNorm = 2.4923, lr_0 = 4.2749e-04
Loss = 4.0256e-01, PNorm = 57.9773, GNorm = 1.1651, lr_0 = 4.2719e-04
Loss = 4.0103e-01, PNorm = 57.9823, GNorm = 1.1641, lr_0 = 4.2690e-04
Loss = 4.2482e-01, PNorm = 57.9869, GNorm = 1.3768, lr_0 = 4.2661e-04
Loss = 3.9584e-01, PNorm = 57.9957, GNorm = 1.2449, lr_0 = 4.2632e-04
Loss = 4.0131e-01, PNorm = 57.9963, GNorm = 1.2205, lr_0 = 4.2602e-04
Loss = 5.0727e-01, PNorm = 58.0016, GNorm = 1.6295, lr_0 = 4.2573e-04
Loss = 4.2209e-01, PNorm = 58.0074, GNorm = 1.2832, lr_0 = 4.2544e-04
Loss = 3.8879e-01, PNorm = 58.0123, GNorm = 1.1228, lr_0 = 4.2515e-04
Loss = 4.1668e-01, PNorm = 58.0152, GNorm = 1.5876, lr_0 = 4.2486e-04
Loss = 4.3869e-01, PNorm = 58.0209, GNorm = 1.7995, lr_0 = 4.2457e-04
Loss = 4.0416e-01, PNorm = 58.0259, GNorm = 1.2694, lr_0 = 4.2428e-04
Loss = 3.9056e-01, PNorm = 58.0362, GNorm = 1.3826, lr_0 = 4.2399e-04
Loss = 4.3658e-01, PNorm = 58.0385, GNorm = 2.6404, lr_0 = 4.2370e-04
Loss = 3.5516e-01, PNorm = 58.0429, GNorm = 0.9386, lr_0 = 4.2340e-04
Loss = 3.6601e-01, PNorm = 58.0491, GNorm = 1.6219, lr_0 = 4.2311e-04
Loss = 3.9552e-01, PNorm = 58.0593, GNorm = 1.3970, lr_0 = 4.2283e-04
Loss = 4.2735e-01, PNorm = 58.0736, GNorm = 1.5721, lr_0 = 4.2254e-04
Loss = 4.9192e-01, PNorm = 58.0838, GNorm = 1.5793, lr_0 = 4.2225e-04
Loss = 4.2013e-01, PNorm = 58.0899, GNorm = 1.0048, lr_0 = 4.2196e-04
Loss = 3.9015e-01, PNorm = 58.0953, GNorm = 1.4491, lr_0 = 4.2167e-04
Loss = 3.7966e-01, PNorm = 58.1011, GNorm = 2.2583, lr_0 = 4.2138e-04
Loss = 3.7153e-01, PNorm = 58.1039, GNorm = 0.9620, lr_0 = 4.2109e-04
Loss = 4.2195e-01, PNorm = 58.1146, GNorm = 1.7351, lr_0 = 4.2080e-04
Loss = 4.0699e-01, PNorm = 58.1200, GNorm = 1.1606, lr_0 = 4.2051e-04
Loss = 3.9915e-01, PNorm = 58.1285, GNorm = 1.7320, lr_0 = 4.2023e-04
Loss = 3.9968e-01, PNorm = 58.1373, GNorm = 1.2415, lr_0 = 4.1994e-04
Loss = 3.3872e-01, PNorm = 58.1428, GNorm = 1.1760, lr_0 = 4.1965e-04
Loss = 3.7706e-01, PNorm = 58.1482, GNorm = 1.1618, lr_0 = 4.1936e-04
Loss = 4.3621e-01, PNorm = 58.1586, GNorm = 1.5081, lr_0 = 4.1907e-04
Loss = 3.9494e-01, PNorm = 58.1626, GNorm = 1.1374, lr_0 = 4.1879e-04
Loss = 4.3595e-01, PNorm = 58.1668, GNorm = 1.0300, lr_0 = 4.1850e-04
Loss = 4.6196e-01, PNorm = 58.1739, GNorm = 3.8174, lr_0 = 4.1821e-04
Loss = 4.4563e-01, PNorm = 58.1759, GNorm = 1.2343, lr_0 = 4.1793e-04
Loss = 3.8071e-01, PNorm = 58.1870, GNorm = 1.4234, lr_0 = 4.1764e-04
Loss = 4.4976e-01, PNorm = 58.1923, GNorm = 2.5816, lr_0 = 4.1736e-04
Loss = 3.9806e-01, PNorm = 58.2080, GNorm = 1.2729, lr_0 = 4.1707e-04
Loss = 3.6360e-01, PNorm = 58.2129, GNorm = 1.0719, lr_0 = 4.1678e-04
Loss = 3.8111e-01, PNorm = 58.2155, GNorm = 1.7617, lr_0 = 4.1650e-04
Loss = 3.8299e-01, PNorm = 58.2205, GNorm = 2.4029, lr_0 = 4.1621e-04
Loss = 4.2448e-01, PNorm = 58.2252, GNorm = 1.9408, lr_0 = 4.1593e-04
Loss = 4.1862e-01, PNorm = 58.2315, GNorm = 1.5177, lr_0 = 4.1564e-04
Loss = 3.5130e-01, PNorm = 58.2339, GNorm = 1.0327, lr_0 = 4.1536e-04
Loss = 3.2393e-01, PNorm = 58.2418, GNorm = 1.5179, lr_0 = 4.1507e-04
Loss = 3.8985e-01, PNorm = 58.2448, GNorm = 1.6337, lr_0 = 4.1479e-04
Loss = 3.7720e-01, PNorm = 58.2552, GNorm = 2.7422, lr_0 = 4.1450e-04
Loss = 4.0370e-01, PNorm = 58.2576, GNorm = 1.6522, lr_0 = 4.1422e-04
Loss = 3.7500e-01, PNorm = 58.2612, GNorm = 1.2555, lr_0 = 4.1394e-04
Loss = 3.8616e-01, PNorm = 58.2628, GNorm = 1.0969, lr_0 = 4.1365e-04
Loss = 3.3163e-01, PNorm = 58.2684, GNorm = 1.0624, lr_0 = 4.1337e-04
Loss = 4.4375e-01, PNorm = 58.2725, GNorm = 1.1981, lr_0 = 4.1309e-04
Loss = 3.8636e-01, PNorm = 58.2769, GNorm = 1.1305, lr_0 = 4.1280e-04
Loss = 3.3542e-01, PNorm = 58.2809, GNorm = 0.9227, lr_0 = 4.1252e-04
Loss = 3.8568e-01, PNorm = 58.2831, GNorm = 1.4847, lr_0 = 4.1224e-04
Loss = 4.3355e-01, PNorm = 58.2895, GNorm = 1.7164, lr_0 = 4.1196e-04
Loss = 4.2462e-01, PNorm = 58.2970, GNorm = 0.9387, lr_0 = 4.1167e-04
Loss = 3.9838e-01, PNorm = 58.3058, GNorm = 1.5921, lr_0 = 4.1139e-04
Loss = 4.1113e-01, PNorm = 58.3095, GNorm = 1.5384, lr_0 = 4.1111e-04
Loss = 3.4791e-01, PNorm = 58.3171, GNorm = 1.4865, lr_0 = 4.1083e-04
Loss = 4.0921e-01, PNorm = 58.3235, GNorm = 1.7627, lr_0 = 4.1055e-04
Loss = 3.6309e-01, PNorm = 58.3322, GNorm = 1.4056, lr_0 = 4.1027e-04
Loss = 4.3614e-01, PNorm = 58.3407, GNorm = 1.3579, lr_0 = 4.0998e-04
Loss = 4.3450e-01, PNorm = 58.3469, GNorm = 2.5620, lr_0 = 4.0970e-04
Loss = 3.5094e-01, PNorm = 58.3555, GNorm = 1.3546, lr_0 = 4.0942e-04
Loss = 4.2409e-01, PNorm = 58.3578, GNorm = 1.2663, lr_0 = 4.0914e-04
Loss = 4.0179e-01, PNorm = 58.3617, GNorm = 1.1182, lr_0 = 4.0886e-04
Loss = 3.8162e-01, PNorm = 58.3680, GNorm = 2.0721, lr_0 = 4.0858e-04
Loss = 3.8194e-01, PNorm = 58.3754, GNorm = 1.8306, lr_0 = 4.0830e-04
Loss = 4.5813e-01, PNorm = 58.3837, GNorm = 1.2661, lr_0 = 4.0802e-04
Loss = 4.1114e-01, PNorm = 58.3873, GNorm = 1.3980, lr_0 = 4.0774e-04
Loss = 3.9898e-01, PNorm = 58.3961, GNorm = 1.3847, lr_0 = 4.0746e-04
Loss = 4.0930e-01, PNorm = 58.4021, GNorm = 1.1646, lr_0 = 4.0718e-04
Loss = 3.9584e-01, PNorm = 58.4095, GNorm = 1.5475, lr_0 = 4.0691e-04
Loss = 4.0866e-01, PNorm = 58.4141, GNorm = 2.1258, lr_0 = 4.0663e-04
Loss = 3.5902e-01, PNorm = 58.4192, GNorm = 1.7937, lr_0 = 4.0635e-04
Loss = 3.6829e-01, PNorm = 58.4258, GNorm = 1.7868, lr_0 = 4.0607e-04
Loss = 3.8830e-01, PNorm = 58.4309, GNorm = 1.4891, lr_0 = 4.0579e-04
Loss = 4.8276e-01, PNorm = 58.4387, GNorm = 1.0221, lr_0 = 4.0551e-04
Loss = 4.1793e-01, PNorm = 58.4450, GNorm = 2.2397, lr_0 = 4.0524e-04
Loss = 3.3652e-01, PNorm = 58.4493, GNorm = 1.0128, lr_0 = 4.0496e-04
Loss = 4.2547e-01, PNorm = 58.4533, GNorm = 1.7271, lr_0 = 4.0468e-04
Validation mae = 0.114157
Epoch 13
Loss = 4.4544e-01, PNorm = 58.4606, GNorm = 1.2832, lr_0 = 4.0440e-04
Loss = 3.6937e-01, PNorm = 58.4653, GNorm = 1.1890, lr_0 = 4.0413e-04
Loss = 3.8327e-01, PNorm = 58.4721, GNorm = 1.6324, lr_0 = 4.0385e-04
Loss = 3.7313e-01, PNorm = 58.4753, GNorm = 1.9957, lr_0 = 4.0357e-04
Loss = 3.7696e-01, PNorm = 58.4758, GNorm = 1.9813, lr_0 = 4.0330e-04
Loss = 3.8013e-01, PNorm = 58.4853, GNorm = 3.0062, lr_0 = 4.0302e-04
Loss = 3.6774e-01, PNorm = 58.4942, GNorm = 1.4891, lr_0 = 4.0274e-04
Loss = 3.8864e-01, PNorm = 58.5024, GNorm = 1.0469, lr_0 = 4.0247e-04
Loss = 4.0931e-01, PNorm = 58.5075, GNorm = 1.4048, lr_0 = 4.0219e-04
Loss = 4.0300e-01, PNorm = 58.5146, GNorm = 1.3358, lr_0 = 4.0192e-04
Loss = 3.5396e-01, PNorm = 58.5216, GNorm = 1.5219, lr_0 = 4.0164e-04
Loss = 3.8676e-01, PNorm = 58.5285, GNorm = 2.0465, lr_0 = 4.0137e-04
Loss = 4.0491e-01, PNorm = 58.5389, GNorm = 1.4623, lr_0 = 4.0109e-04
Loss = 3.7898e-01, PNorm = 58.5452, GNorm = 1.4634, lr_0 = 4.0082e-04
Loss = 4.0461e-01, PNorm = 58.5494, GNorm = 1.3208, lr_0 = 4.0054e-04
Loss = 3.7998e-01, PNorm = 58.5578, GNorm = 2.0415, lr_0 = 4.0027e-04
Loss = 3.4348e-01, PNorm = 58.5679, GNorm = 1.3646, lr_0 = 3.9999e-04
Loss = 3.6827e-01, PNorm = 58.5758, GNorm = 1.3501, lr_0 = 3.9972e-04
Loss = 3.7067e-01, PNorm = 58.5797, GNorm = 0.9604, lr_0 = 3.9945e-04
Loss = 3.7260e-01, PNorm = 58.5906, GNorm = 1.1101, lr_0 = 3.9917e-04
Loss = 3.7071e-01, PNorm = 58.5993, GNorm = 0.9647, lr_0 = 3.9890e-04
Loss = 4.1644e-01, PNorm = 58.6055, GNorm = 2.0371, lr_0 = 3.9863e-04
Loss = 4.4023e-01, PNorm = 58.6124, GNorm = 3.0565, lr_0 = 3.9835e-04
Loss = 4.1620e-01, PNorm = 58.6148, GNorm = 1.3382, lr_0 = 3.9808e-04
Loss = 3.7955e-01, PNorm = 58.6208, GNorm = 1.4322, lr_0 = 3.9781e-04
Loss = 4.1408e-01, PNorm = 58.6253, GNorm = 2.1991, lr_0 = 3.9753e-04
Loss = 4.7526e-01, PNorm = 58.6311, GNorm = 1.7155, lr_0 = 3.9726e-04
Loss = 3.7717e-01, PNorm = 58.6354, GNorm = 1.2197, lr_0 = 3.9699e-04
Loss = 4.4419e-01, PNorm = 58.6415, GNorm = 1.5767, lr_0 = 3.9672e-04
Loss = 3.9426e-01, PNorm = 58.6457, GNorm = 1.7907, lr_0 = 3.9645e-04
Loss = 3.9033e-01, PNorm = 58.6522, GNorm = 1.8605, lr_0 = 3.9617e-04
Loss = 4.0634e-01, PNorm = 58.6600, GNorm = 1.3323, lr_0 = 3.9590e-04
Loss = 4.7403e-01, PNorm = 58.6601, GNorm = 3.0553, lr_0 = 3.9563e-04
Loss = 3.9898e-01, PNorm = 58.6632, GNorm = 1.0403, lr_0 = 3.9536e-04
Loss = 3.7701e-01, PNorm = 58.6672, GNorm = 1.3678, lr_0 = 3.9509e-04
Loss = 3.8676e-01, PNorm = 58.6701, GNorm = 1.4919, lr_0 = 3.9482e-04
Loss = 3.8188e-01, PNorm = 58.6785, GNorm = 2.2288, lr_0 = 3.9455e-04
Loss = 4.2365e-01, PNorm = 58.6802, GNorm = 1.3496, lr_0 = 3.9428e-04
Loss = 4.1166e-01, PNorm = 58.6870, GNorm = 1.4927, lr_0 = 3.9401e-04
Loss = 3.5606e-01, PNorm = 58.6946, GNorm = 1.4881, lr_0 = 3.9374e-04
Loss = 4.0495e-01, PNorm = 58.7042, GNorm = 1.1744, lr_0 = 3.9347e-04
Loss = 3.7641e-01, PNorm = 58.7113, GNorm = 0.9972, lr_0 = 3.9320e-04
Loss = 3.8889e-01, PNorm = 58.7172, GNorm = 1.6198, lr_0 = 3.9293e-04
Loss = 4.0393e-01, PNorm = 58.7213, GNorm = 1.3893, lr_0 = 3.9266e-04
Loss = 4.0651e-01, PNorm = 58.7292, GNorm = 1.3159, lr_0 = 3.9239e-04
Loss = 3.3600e-01, PNorm = 58.7339, GNorm = 1.4694, lr_0 = 3.9212e-04
Loss = 3.9584e-01, PNorm = 58.7390, GNorm = 1.0064, lr_0 = 3.9185e-04
Loss = 3.9284e-01, PNorm = 58.7441, GNorm = 1.6156, lr_0 = 3.9159e-04
Loss = 4.1824e-01, PNorm = 58.7512, GNorm = 0.8879, lr_0 = 3.9132e-04
Loss = 4.0588e-01, PNorm = 58.7588, GNorm = 1.1203, lr_0 = 3.9105e-04
Loss = 3.8654e-01, PNorm = 58.7645, GNorm = 1.3865, lr_0 = 3.9078e-04
Loss = 3.5980e-01, PNorm = 58.7744, GNorm = 1.2521, lr_0 = 3.9051e-04
Loss = 3.8515e-01, PNorm = 58.7851, GNorm = 1.2338, lr_0 = 3.9025e-04
Loss = 3.9040e-01, PNorm = 58.7905, GNorm = 1.2695, lr_0 = 3.8998e-04
Loss = 4.1477e-01, PNorm = 58.7979, GNorm = 6.3030, lr_0 = 3.8971e-04
Loss = 4.2543e-01, PNorm = 58.8020, GNorm = 1.4845, lr_0 = 3.8945e-04
Loss = 3.5932e-01, PNorm = 58.8082, GNorm = 1.1724, lr_0 = 3.8918e-04
Loss = 3.9823e-01, PNorm = 58.8127, GNorm = 1.4829, lr_0 = 3.8891e-04
Loss = 4.2222e-01, PNorm = 58.8147, GNorm = 1.1592, lr_0 = 3.8865e-04
Loss = 3.3236e-01, PNorm = 58.8223, GNorm = 1.4371, lr_0 = 3.8838e-04
Loss = 4.3393e-01, PNorm = 58.8248, GNorm = 1.1693, lr_0 = 3.8811e-04
Loss = 4.3568e-01, PNorm = 58.8300, GNorm = 1.2901, lr_0 = 3.8785e-04
Loss = 3.4481e-01, PNorm = 58.8298, GNorm = 1.3833, lr_0 = 3.8758e-04
Loss = 3.9506e-01, PNorm = 58.8301, GNorm = 1.2606, lr_0 = 3.8732e-04
Loss = 3.7617e-01, PNorm = 58.8353, GNorm = 1.3302, lr_0 = 3.8705e-04
Loss = 4.3630e-01, PNorm = 58.8399, GNorm = 1.5723, lr_0 = 3.8679e-04
Loss = 4.7176e-01, PNorm = 58.8465, GNorm = 1.5421, lr_0 = 3.8652e-04
Loss = 3.7788e-01, PNorm = 58.8537, GNorm = 1.5572, lr_0 = 3.8626e-04
Loss = 3.9577e-01, PNorm = 58.8585, GNorm = 1.2555, lr_0 = 3.8599e-04
Loss = 3.4680e-01, PNorm = 58.8630, GNorm = 1.1714, lr_0 = 3.8573e-04
Loss = 3.6986e-01, PNorm = 58.8656, GNorm = 1.1996, lr_0 = 3.8546e-04
Loss = 3.6473e-01, PNorm = 58.8689, GNorm = 1.4272, lr_0 = 3.8520e-04
Loss = 4.5779e-01, PNorm = 58.8755, GNorm = 2.4013, lr_0 = 3.8493e-04
Loss = 3.9062e-01, PNorm = 58.8821, GNorm = 1.8753, lr_0 = 3.8467e-04
Loss = 4.1808e-01, PNorm = 58.8918, GNorm = 1.5389, lr_0 = 3.8441e-04
Loss = 4.0076e-01, PNorm = 58.9010, GNorm = 1.0896, lr_0 = 3.8414e-04
Loss = 4.1249e-01, PNorm = 58.9075, GNorm = 2.0597, lr_0 = 3.8388e-04
Loss = 4.5146e-01, PNorm = 58.9177, GNorm = 1.4508, lr_0 = 3.8362e-04
Loss = 4.1774e-01, PNorm = 58.9260, GNorm = 1.3036, lr_0 = 3.8336e-04
Loss = 4.1426e-01, PNorm = 58.9316, GNorm = 1.5029, lr_0 = 3.8309e-04
Loss = 3.5705e-01, PNorm = 58.9391, GNorm = 1.1135, lr_0 = 3.8283e-04
Loss = 4.5031e-01, PNorm = 58.9443, GNorm = 1.3794, lr_0 = 3.8257e-04
Loss = 4.6476e-01, PNorm = 58.9534, GNorm = 2.2166, lr_0 = 3.8231e-04
Loss = 3.5292e-01, PNorm = 58.9576, GNorm = 1.9081, lr_0 = 3.8204e-04
Loss = 3.8512e-01, PNorm = 58.9634, GNorm = 1.5022, lr_0 = 3.8178e-04
Loss = 4.0785e-01, PNorm = 58.9689, GNorm = 1.0887, lr_0 = 3.8152e-04
Loss = 3.4541e-01, PNorm = 58.9764, GNorm = 1.6767, lr_0 = 3.8126e-04
Loss = 3.8599e-01, PNorm = 58.9748, GNorm = 1.3526, lr_0 = 3.8100e-04
Loss = 3.9318e-01, PNorm = 58.9826, GNorm = 1.0088, lr_0 = 3.8074e-04
Loss = 4.0374e-01, PNorm = 58.9909, GNorm = 1.0940, lr_0 = 3.8048e-04
Loss = 3.6949e-01, PNorm = 58.9974, GNorm = 1.3479, lr_0 = 3.8022e-04
Loss = 3.8975e-01, PNorm = 58.9998, GNorm = 1.8059, lr_0 = 3.7995e-04
Loss = 3.8621e-01, PNorm = 58.9989, GNorm = 1.2204, lr_0 = 3.7969e-04
Loss = 4.3944e-01, PNorm = 59.0074, GNorm = 1.7272, lr_0 = 3.7943e-04
Loss = 5.2421e-01, PNorm = 59.0125, GNorm = 1.6261, lr_0 = 3.7917e-04
Loss = 4.2245e-01, PNorm = 59.0222, GNorm = 1.8487, lr_0 = 3.7891e-04
Loss = 3.7258e-01, PNorm = 59.0288, GNorm = 0.9401, lr_0 = 3.7866e-04
Loss = 4.2653e-01, PNorm = 59.0379, GNorm = 1.8564, lr_0 = 3.7840e-04
Loss = 3.5977e-01, PNorm = 59.0464, GNorm = 1.3562, lr_0 = 3.7814e-04
Loss = 3.8278e-01, PNorm = 59.0550, GNorm = 0.8676, lr_0 = 3.7788e-04
Loss = 3.9780e-01, PNorm = 59.0571, GNorm = 1.5012, lr_0 = 3.7762e-04
Loss = 3.5151e-01, PNorm = 59.0630, GNorm = 1.1542, lr_0 = 3.7736e-04
Loss = 3.9995e-01, PNorm = 59.0655, GNorm = 1.0755, lr_0 = 3.7710e-04
Loss = 3.8147e-01, PNorm = 59.0658, GNorm = 1.4210, lr_0 = 3.7684e-04
Loss = 3.8671e-01, PNorm = 59.0663, GNorm = 1.6835, lr_0 = 3.7659e-04
Loss = 3.6824e-01, PNorm = 59.0736, GNorm = 1.2822, lr_0 = 3.7633e-04
Loss = 3.1014e-01, PNorm = 59.0756, GNorm = 1.1028, lr_0 = 3.7607e-04
Loss = 4.2431e-01, PNorm = 59.0755, GNorm = 1.3399, lr_0 = 3.7581e-04
Loss = 4.3615e-01, PNorm = 59.0809, GNorm = 1.8398, lr_0 = 3.7555e-04
Loss = 3.9056e-01, PNorm = 59.0877, GNorm = 1.0276, lr_0 = 3.7530e-04
Loss = 4.2004e-01, PNorm = 59.0907, GNorm = 1.3577, lr_0 = 3.7504e-04
Loss = 3.8921e-01, PNorm = 59.0946, GNorm = 1.5745, lr_0 = 3.7478e-04
Loss = 4.4479e-01, PNorm = 59.0955, GNorm = 1.8624, lr_0 = 3.7453e-04
Loss = 4.2378e-01, PNorm = 59.1026, GNorm = 1.7633, lr_0 = 3.7427e-04
Loss = 4.0552e-01, PNorm = 59.1085, GNorm = 1.4993, lr_0 = 3.7401e-04
Loss = 3.1680e-01, PNorm = 59.1123, GNorm = 1.2482, lr_0 = 3.7376e-04
Loss = 3.9313e-01, PNorm = 59.1194, GNorm = 1.0241, lr_0 = 3.7350e-04
Loss = 3.7923e-01, PNorm = 59.1239, GNorm = 1.3695, lr_0 = 3.7325e-04
Loss = 3.6414e-01, PNorm = 59.1264, GNorm = 1.4332, lr_0 = 3.7299e-04
Loss = 3.4042e-01, PNorm = 59.1272, GNorm = 1.1123, lr_0 = 3.7273e-04
Validation mae = 0.113558
Epoch 14
Loss = 3.9748e-01, PNorm = 59.1291, GNorm = 1.5617, lr_0 = 3.7248e-04
Loss = 3.9577e-01, PNorm = 59.1330, GNorm = 1.4242, lr_0 = 3.7222e-04
Loss = 3.8513e-01, PNorm = 59.1436, GNorm = 1.8569, lr_0 = 3.7197e-04
Loss = 4.3179e-01, PNorm = 59.1503, GNorm = 1.5777, lr_0 = 3.7171e-04
Loss = 3.9155e-01, PNorm = 59.1587, GNorm = 1.6499, lr_0 = 3.7146e-04
Loss = 4.5124e-01, PNorm = 59.1622, GNorm = 1.8105, lr_0 = 3.7120e-04
Loss = 4.1487e-01, PNorm = 59.1716, GNorm = 1.5907, lr_0 = 3.7095e-04
Loss = 3.2929e-01, PNorm = 59.1812, GNorm = 1.1914, lr_0 = 3.7070e-04
Loss = 4.0030e-01, PNorm = 59.1857, GNorm = 1.3398, lr_0 = 3.7044e-04
Loss = 3.6661e-01, PNorm = 59.1886, GNorm = 1.5913, lr_0 = 3.7019e-04
Loss = 3.7734e-01, PNorm = 59.1918, GNorm = 1.2868, lr_0 = 3.6993e-04
Loss = 3.9405e-01, PNorm = 59.2005, GNorm = 1.8040, lr_0 = 3.6968e-04
Loss = 4.0659e-01, PNorm = 59.2013, GNorm = 1.7601, lr_0 = 3.6943e-04
Loss = 4.0870e-01, PNorm = 59.2129, GNorm = 1.2456, lr_0 = 3.6917e-04
Loss = 3.8687e-01, PNorm = 59.2178, GNorm = 1.5110, lr_0 = 3.6892e-04
Loss = 3.6488e-01, PNorm = 59.2220, GNorm = 1.8413, lr_0 = 3.6867e-04
Loss = 3.7762e-01, PNorm = 59.2243, GNorm = 1.0904, lr_0 = 3.6842e-04
Loss = 3.9237e-01, PNorm = 59.2284, GNorm = 1.1024, lr_0 = 3.6816e-04
Loss = 3.7420e-01, PNorm = 59.2342, GNorm = 0.9113, lr_0 = 3.6791e-04
Loss = 3.7714e-01, PNorm = 59.2402, GNorm = 1.2615, lr_0 = 3.6766e-04
Loss = 4.1973e-01, PNorm = 59.2443, GNorm = 1.3315, lr_0 = 3.6741e-04
Loss = 4.0302e-01, PNorm = 59.2461, GNorm = 2.0406, lr_0 = 3.6716e-04
Loss = 3.9856e-01, PNorm = 59.2552, GNorm = 1.0711, lr_0 = 3.6690e-04
Loss = 4.0265e-01, PNorm = 59.2606, GNorm = 2.1916, lr_0 = 3.6665e-04
Loss = 4.0318e-01, PNorm = 59.2694, GNorm = 1.7263, lr_0 = 3.6640e-04
Loss = 3.1792e-01, PNorm = 59.2824, GNorm = 1.4141, lr_0 = 3.6615e-04
Loss = 3.5043e-01, PNorm = 59.2868, GNorm = 1.5224, lr_0 = 3.6590e-04
Loss = 4.1361e-01, PNorm = 59.2909, GNorm = 1.8190, lr_0 = 3.6565e-04
Loss = 3.8966e-01, PNorm = 59.2930, GNorm = 1.2626, lr_0 = 3.6540e-04
Loss = 3.9603e-01, PNorm = 59.3022, GNorm = 1.4798, lr_0 = 3.6515e-04
Loss = 3.9769e-01, PNorm = 59.3068, GNorm = 1.2500, lr_0 = 3.6490e-04
Loss = 3.6587e-01, PNorm = 59.3140, GNorm = 1.1485, lr_0 = 3.6465e-04
Loss = 3.3775e-01, PNorm = 59.3235, GNorm = 1.1173, lr_0 = 3.6440e-04
Loss = 4.2143e-01, PNorm = 59.3293, GNorm = 1.5595, lr_0 = 3.6415e-04
Loss = 3.6070e-01, PNorm = 59.3358, GNorm = 1.0236, lr_0 = 3.6390e-04
Loss = 3.7519e-01, PNorm = 59.3423, GNorm = 1.4126, lr_0 = 3.6365e-04
Loss = 3.7603e-01, PNorm = 59.3471, GNorm = 1.2688, lr_0 = 3.6340e-04
Loss = 3.6615e-01, PNorm = 59.3511, GNorm = 1.5528, lr_0 = 3.6315e-04
Loss = 3.5120e-01, PNorm = 59.3555, GNorm = 1.4913, lr_0 = 3.6290e-04
Loss = 3.8640e-01, PNorm = 59.3588, GNorm = 1.6058, lr_0 = 3.6266e-04
Loss = 3.7466e-01, PNorm = 59.3649, GNorm = 1.3120, lr_0 = 3.6241e-04
Loss = 3.8270e-01, PNorm = 59.3736, GNorm = 1.5146, lr_0 = 3.6216e-04
Loss = 4.1788e-01, PNorm = 59.3791, GNorm = 1.5390, lr_0 = 3.6191e-04
Loss = 4.2225e-01, PNorm = 59.3866, GNorm = 1.4774, lr_0 = 3.6166e-04
Loss = 3.9453e-01, PNorm = 59.3958, GNorm = 1.7599, lr_0 = 3.6141e-04
Loss = 3.6668e-01, PNorm = 59.4028, GNorm = 1.4206, lr_0 = 3.6117e-04
Loss = 3.8903e-01, PNorm = 59.4051, GNorm = 1.0760, lr_0 = 3.6092e-04
Loss = 3.5632e-01, PNorm = 59.4108, GNorm = 0.9107, lr_0 = 3.6067e-04
Loss = 3.8156e-01, PNorm = 59.4129, GNorm = 1.4678, lr_0 = 3.6043e-04
Loss = 3.8178e-01, PNorm = 59.4182, GNorm = 1.3774, lr_0 = 3.6018e-04
Loss = 3.8384e-01, PNorm = 59.4198, GNorm = 1.1966, lr_0 = 3.5993e-04
Loss = 4.2948e-01, PNorm = 59.4204, GNorm = 3.0528, lr_0 = 3.5969e-04
Loss = 4.0304e-01, PNorm = 59.4251, GNorm = 1.2589, lr_0 = 3.5944e-04
Loss = 4.2032e-01, PNorm = 59.4318, GNorm = 0.9941, lr_0 = 3.5919e-04
Loss = 3.9522e-01, PNorm = 59.4370, GNorm = 1.9195, lr_0 = 3.5895e-04
Loss = 4.1115e-01, PNorm = 59.4458, GNorm = 2.0294, lr_0 = 3.5870e-04
Loss = 4.5240e-01, PNorm = 59.4497, GNorm = 1.1801, lr_0 = 3.5845e-04
Loss = 4.4035e-01, PNorm = 59.4539, GNorm = 1.8036, lr_0 = 3.5821e-04
Loss = 4.4955e-01, PNorm = 59.4572, GNorm = 1.7746, lr_0 = 3.5796e-04
Loss = 3.6894e-01, PNorm = 59.4649, GNorm = 1.0107, lr_0 = 3.5772e-04
Loss = 4.1177e-01, PNorm = 59.4701, GNorm = 2.3633, lr_0 = 3.5747e-04
Loss = 3.9380e-01, PNorm = 59.4800, GNorm = 1.7037, lr_0 = 3.5723e-04
Loss = 3.4152e-01, PNorm = 59.4841, GNorm = 1.3414, lr_0 = 3.5698e-04
Loss = 4.2017e-01, PNorm = 59.4878, GNorm = 1.8524, lr_0 = 3.5674e-04
Loss = 3.5494e-01, PNorm = 59.4947, GNorm = 1.3551, lr_0 = 3.5650e-04
Loss = 3.4285e-01, PNorm = 59.4988, GNorm = 1.6137, lr_0 = 3.5625e-04
Loss = 3.8349e-01, PNorm = 59.5036, GNorm = 1.1910, lr_0 = 3.5601e-04
Loss = 3.2119e-01, PNorm = 59.5086, GNorm = 1.4025, lr_0 = 3.5576e-04
Loss = 4.7635e-01, PNorm = 59.5088, GNorm = 1.9136, lr_0 = 3.5552e-04
Loss = 3.8148e-01, PNorm = 59.5112, GNorm = 1.3129, lr_0 = 3.5528e-04
Loss = 4.2251e-01, PNorm = 59.5125, GNorm = 1.4324, lr_0 = 3.5503e-04
Loss = 4.0528e-01, PNorm = 59.5190, GNorm = 1.4115, lr_0 = 3.5479e-04
Loss = 4.0545e-01, PNorm = 59.5245, GNorm = 1.6324, lr_0 = 3.5455e-04
Loss = 4.2025e-01, PNorm = 59.5306, GNorm = 1.1417, lr_0 = 3.5430e-04
Loss = 4.5223e-01, PNorm = 59.5370, GNorm = 1.3477, lr_0 = 3.5406e-04
Loss = 3.4413e-01, PNorm = 59.5453, GNorm = 1.0493, lr_0 = 3.5382e-04
Loss = 4.0055e-01, PNorm = 59.5464, GNorm = 1.3582, lr_0 = 3.5358e-04
Loss = 3.8928e-01, PNorm = 59.5505, GNorm = 1.9045, lr_0 = 3.5333e-04
Loss = 3.9154e-01, PNorm = 59.5577, GNorm = 2.8757, lr_0 = 3.5309e-04
Loss = 3.9966e-01, PNorm = 59.5608, GNorm = 1.8439, lr_0 = 3.5285e-04
Loss = 4.6642e-01, PNorm = 59.5665, GNorm = 1.5127, lr_0 = 3.5261e-04
Loss = 3.9967e-01, PNorm = 59.5705, GNorm = 1.8631, lr_0 = 3.5237e-04
Loss = 3.9927e-01, PNorm = 59.5733, GNorm = 1.0662, lr_0 = 3.5212e-04
Loss = 4.1805e-01, PNorm = 59.5754, GNorm = 1.2929, lr_0 = 3.5188e-04
Loss = 3.3802e-01, PNorm = 59.5794, GNorm = 1.6966, lr_0 = 3.5164e-04
Loss = 4.7186e-01, PNorm = 59.5825, GNorm = 1.1799, lr_0 = 3.5140e-04
Loss = 3.6975e-01, PNorm = 59.5894, GNorm = 1.4261, lr_0 = 3.5116e-04
Loss = 4.0162e-01, PNorm = 59.5951, GNorm = 2.1876, lr_0 = 3.5092e-04
Loss = 3.5890e-01, PNorm = 59.5986, GNorm = 1.6181, lr_0 = 3.5068e-04
Loss = 3.9277e-01, PNorm = 59.6031, GNorm = 1.3819, lr_0 = 3.5044e-04
Loss = 3.2870e-01, PNorm = 59.6054, GNorm = 1.4467, lr_0 = 3.5020e-04
Loss = 3.8765e-01, PNorm = 59.6127, GNorm = 1.6048, lr_0 = 3.4996e-04
Loss = 3.7086e-01, PNorm = 59.6175, GNorm = 1.1561, lr_0 = 3.4972e-04
Loss = 3.8096e-01, PNorm = 59.6191, GNorm = 1.4089, lr_0 = 3.4948e-04
Loss = 4.2974e-01, PNorm = 59.6243, GNorm = 1.8749, lr_0 = 3.4924e-04
Loss = 4.2729e-01, PNorm = 59.6298, GNorm = 1.8797, lr_0 = 3.4900e-04
Loss = 3.4376e-01, PNorm = 59.6296, GNorm = 1.3697, lr_0 = 3.4876e-04
Loss = 3.9040e-01, PNorm = 59.6341, GNorm = 1.3920, lr_0 = 3.4852e-04
Loss = 3.5678e-01, PNorm = 59.6390, GNorm = 1.4036, lr_0 = 3.4828e-04
Loss = 4.6068e-01, PNorm = 59.6466, GNorm = 1.6628, lr_0 = 3.4805e-04
Loss = 3.6872e-01, PNorm = 59.6537, GNorm = 1.4004, lr_0 = 3.4781e-04
Loss = 3.4921e-01, PNorm = 59.6584, GNorm = 1.2599, lr_0 = 3.4757e-04
Loss = 3.7313e-01, PNorm = 59.6601, GNorm = 1.2090, lr_0 = 3.4733e-04
Loss = 3.6826e-01, PNorm = 59.6592, GNorm = 1.0415, lr_0 = 3.4709e-04
Loss = 3.5374e-01, PNorm = 59.6666, GNorm = 1.2415, lr_0 = 3.4686e-04
Loss = 3.6836e-01, PNorm = 59.6707, GNorm = 1.2334, lr_0 = 3.4662e-04
Loss = 3.2989e-01, PNorm = 59.6750, GNorm = 1.2144, lr_0 = 3.4638e-04
Loss = 4.0335e-01, PNorm = 59.6749, GNorm = 1.6939, lr_0 = 3.4614e-04
Loss = 3.8576e-01, PNorm = 59.6808, GNorm = 1.2229, lr_0 = 3.4591e-04
Loss = 3.2213e-01, PNorm = 59.6860, GNorm = 1.2754, lr_0 = 3.4567e-04
Loss = 3.7977e-01, PNorm = 59.6911, GNorm = 1.3992, lr_0 = 3.4543e-04
Loss = 3.8243e-01, PNorm = 59.6907, GNorm = 1.6873, lr_0 = 3.4520e-04
Loss = 3.7980e-01, PNorm = 59.6962, GNorm = 1.2285, lr_0 = 3.4496e-04
Loss = 4.3277e-01, PNorm = 59.7004, GNorm = 1.6400, lr_0 = 3.4472e-04
Loss = 3.9819e-01, PNorm = 59.7098, GNorm = 1.9636, lr_0 = 3.4449e-04
Loss = 3.8769e-01, PNorm = 59.7134, GNorm = 1.5834, lr_0 = 3.4425e-04
Loss = 4.0504e-01, PNorm = 59.7168, GNorm = 1.1920, lr_0 = 3.4402e-04
Loss = 3.7159e-01, PNorm = 59.7187, GNorm = 1.5357, lr_0 = 3.4378e-04
Loss = 4.4572e-01, PNorm = 59.7178, GNorm = 1.6018, lr_0 = 3.4354e-04
Loss = 4.2552e-01, PNorm = 59.7188, GNorm = 1.4163, lr_0 = 3.4331e-04
Validation mae = 0.113032
Epoch 15
Loss = 3.8499e-01, PNorm = 59.7214, GNorm = 1.3694, lr_0 = 3.4307e-04
Loss = 3.7477e-01, PNorm = 59.7256, GNorm = 0.9884, lr_0 = 3.4284e-04
Loss = 3.6965e-01, PNorm = 59.7301, GNorm = 1.2658, lr_0 = 3.4260e-04
Loss = 3.6027e-01, PNorm = 59.7349, GNorm = 1.3702, lr_0 = 3.4237e-04
Loss = 4.0273e-01, PNorm = 59.7392, GNorm = 1.2894, lr_0 = 3.4213e-04
Loss = 3.8416e-01, PNorm = 59.7433, GNorm = 1.3240, lr_0 = 3.4190e-04
Loss = 3.9391e-01, PNorm = 59.7479, GNorm = 1.1133, lr_0 = 3.4167e-04
Loss = 3.6394e-01, PNorm = 59.7546, GNorm = 1.3229, lr_0 = 3.4143e-04
Loss = 3.8141e-01, PNorm = 59.7588, GNorm = 2.1539, lr_0 = 3.4120e-04
Loss = 3.7626e-01, PNorm = 59.7606, GNorm = 2.0226, lr_0 = 3.4096e-04
Loss = 3.6622e-01, PNorm = 59.7641, GNorm = 1.6893, lr_0 = 3.4073e-04
Loss = 4.1890e-01, PNorm = 59.7664, GNorm = 1.3337, lr_0 = 3.4050e-04
Loss = 4.2329e-01, PNorm = 59.7705, GNorm = 1.3032, lr_0 = 3.4026e-04
Loss = 3.7548e-01, PNorm = 59.7771, GNorm = 1.0444, lr_0 = 3.4003e-04
Loss = 3.5167e-01, PNorm = 59.7808, GNorm = 1.5095, lr_0 = 3.3980e-04
Loss = 3.5138e-01, PNorm = 59.7842, GNorm = 2.0033, lr_0 = 3.3956e-04
Loss = 4.1606e-01, PNorm = 59.7873, GNorm = 2.1984, lr_0 = 3.3933e-04
Loss = 3.5797e-01, PNorm = 59.7976, GNorm = 1.4665, lr_0 = 3.3910e-04
Loss = 4.0816e-01, PNorm = 59.7992, GNorm = 1.2118, lr_0 = 3.3887e-04
Loss = 3.7076e-01, PNorm = 59.8111, GNorm = 1.2653, lr_0 = 3.3864e-04
Loss = 3.4647e-01, PNorm = 59.8161, GNorm = 1.2503, lr_0 = 3.3840e-04
Loss = 3.5727e-01, PNorm = 59.8226, GNorm = 1.8618, lr_0 = 3.3817e-04
Loss = 4.0008e-01, PNorm = 59.8233, GNorm = 1.4651, lr_0 = 3.3794e-04
Loss = 4.1151e-01, PNorm = 59.8265, GNorm = 1.1802, lr_0 = 3.3771e-04
Loss = 3.9980e-01, PNorm = 59.8320, GNorm = 2.4052, lr_0 = 3.3748e-04
Loss = 4.2475e-01, PNorm = 59.8371, GNorm = 3.4067, lr_0 = 3.3725e-04
Loss = 3.8492e-01, PNorm = 59.8360, GNorm = 1.7056, lr_0 = 3.3701e-04
Loss = 4.0479e-01, PNorm = 59.8413, GNorm = 1.1615, lr_0 = 3.3678e-04
Loss = 4.0802e-01, PNorm = 59.8483, GNorm = 1.7240, lr_0 = 3.3655e-04
Loss = 3.7614e-01, PNorm = 59.8547, GNorm = 1.4995, lr_0 = 3.3632e-04
Loss = 3.8641e-01, PNorm = 59.8556, GNorm = 1.1748, lr_0 = 3.3609e-04
Loss = 4.0348e-01, PNorm = 59.8627, GNorm = 1.8997, lr_0 = 3.3586e-04
Loss = 4.1029e-01, PNorm = 59.8699, GNorm = 1.1242, lr_0 = 3.3563e-04
Loss = 4.4108e-01, PNorm = 59.8753, GNorm = 1.7308, lr_0 = 3.3540e-04
Loss = 3.4961e-01, PNorm = 59.8797, GNorm = 1.2753, lr_0 = 3.3517e-04
Loss = 4.2136e-01, PNorm = 59.8834, GNorm = 1.2391, lr_0 = 3.3494e-04
Loss = 4.0635e-01, PNorm = 59.8898, GNorm = 1.2335, lr_0 = 3.3471e-04
Loss = 4.0914e-01, PNorm = 59.8891, GNorm = 1.2248, lr_0 = 3.3448e-04
Loss = 3.6953e-01, PNorm = 59.8945, GNorm = 1.2815, lr_0 = 3.3425e-04
Loss = 4.4895e-01, PNorm = 59.8983, GNorm = 1.2348, lr_0 = 3.3403e-04
Loss = 3.7841e-01, PNorm = 59.9059, GNorm = 1.0690, lr_0 = 3.3380e-04
Loss = 3.8509e-01, PNorm = 59.9043, GNorm = 1.2740, lr_0 = 3.3357e-04
Loss = 4.0636e-01, PNorm = 59.9095, GNorm = 1.2177, lr_0 = 3.3334e-04
Loss = 3.6799e-01, PNorm = 59.9147, GNorm = 1.6363, lr_0 = 3.3311e-04
Loss = 3.4432e-01, PNorm = 59.9195, GNorm = 1.8301, lr_0 = 3.3288e-04
Loss = 3.4247e-01, PNorm = 59.9247, GNorm = 1.9746, lr_0 = 3.3265e-04
Loss = 4.3246e-01, PNorm = 59.9309, GNorm = 1.0258, lr_0 = 3.3243e-04
Loss = 4.0698e-01, PNorm = 59.9335, GNorm = 2.0720, lr_0 = 3.3220e-04
Loss = 3.9145e-01, PNorm = 59.9395, GNorm = 1.5338, lr_0 = 3.3197e-04
Loss = 4.1227e-01, PNorm = 59.9428, GNorm = 1.2816, lr_0 = 3.3174e-04
Loss = 4.0122e-01, PNorm = 59.9461, GNorm = 2.2726, lr_0 = 3.3152e-04
Loss = 3.7355e-01, PNorm = 59.9514, GNorm = 1.6649, lr_0 = 3.3129e-04
Loss = 3.7053e-01, PNorm = 59.9544, GNorm = 1.5972, lr_0 = 3.3106e-04
Loss = 3.6054e-01, PNorm = 59.9635, GNorm = 1.4941, lr_0 = 3.3084e-04
Loss = 3.7706e-01, PNorm = 59.9682, GNorm = 1.7656, lr_0 = 3.3061e-04
Loss = 3.9329e-01, PNorm = 59.9730, GNorm = 1.2167, lr_0 = 3.3038e-04
Loss = 3.5378e-01, PNorm = 59.9780, GNorm = 1.4433, lr_0 = 3.3016e-04
Loss = 3.7918e-01, PNorm = 59.9831, GNorm = 1.2179, lr_0 = 3.2993e-04
Loss = 3.4421e-01, PNorm = 59.9865, GNorm = 1.1032, lr_0 = 3.2970e-04
Loss = 3.8800e-01, PNorm = 59.9898, GNorm = 1.2859, lr_0 = 3.2948e-04
Loss = 4.0359e-01, PNorm = 59.9924, GNorm = 1.1314, lr_0 = 3.2925e-04
Loss = 4.0775e-01, PNorm = 60.0026, GNorm = 0.9830, lr_0 = 3.2903e-04
Loss = 3.5815e-01, PNorm = 60.0076, GNorm = 1.2681, lr_0 = 3.2880e-04
Loss = 3.7639e-01, PNorm = 60.0104, GNorm = 1.6935, lr_0 = 3.2858e-04
Loss = 3.7366e-01, PNorm = 60.0160, GNorm = 1.9681, lr_0 = 3.2835e-04
Loss = 3.7687e-01, PNorm = 60.0194, GNorm = 1.9860, lr_0 = 3.2813e-04
Loss = 3.9674e-01, PNorm = 60.0232, GNorm = 1.7005, lr_0 = 3.2790e-04
Loss = 3.5180e-01, PNorm = 60.0272, GNorm = 0.8644, lr_0 = 3.2768e-04
Loss = 3.9917e-01, PNorm = 60.0342, GNorm = 1.9849, lr_0 = 3.2745e-04
Loss = 3.8275e-01, PNorm = 60.0408, GNorm = 1.1946, lr_0 = 3.2723e-04
Loss = 3.8726e-01, PNorm = 60.0474, GNorm = 1.0635, lr_0 = 3.2700e-04
Loss = 3.9024e-01, PNorm = 60.0495, GNorm = 1.1716, lr_0 = 3.2678e-04
Loss = 3.5204e-01, PNorm = 60.0534, GNorm = 1.8664, lr_0 = 3.2656e-04
Loss = 3.8940e-01, PNorm = 60.0540, GNorm = 1.8501, lr_0 = 3.2633e-04
Loss = 4.1000e-01, PNorm = 60.0577, GNorm = 2.0827, lr_0 = 3.2611e-04
Loss = 3.9862e-01, PNorm = 60.0630, GNorm = 1.8318, lr_0 = 3.2589e-04
Loss = 4.0102e-01, PNorm = 60.0650, GNorm = 1.2299, lr_0 = 3.2566e-04
Loss = 4.1154e-01, PNorm = 60.0683, GNorm = 1.8445, lr_0 = 3.2544e-04
Loss = 3.7357e-01, PNorm = 60.0728, GNorm = 1.1435, lr_0 = 3.2522e-04
Loss = 3.5565e-01, PNorm = 60.0788, GNorm = 1.4514, lr_0 = 3.2499e-04
Loss = 3.8199e-01, PNorm = 60.0843, GNorm = 2.0001, lr_0 = 3.2477e-04
Loss = 3.7770e-01, PNorm = 60.0903, GNorm = 1.5913, lr_0 = 3.2455e-04
Loss = 3.9496e-01, PNorm = 60.0934, GNorm = 1.7032, lr_0 = 3.2433e-04
Loss = 3.7062e-01, PNorm = 60.0975, GNorm = 1.3372, lr_0 = 3.2410e-04
Loss = 3.8005e-01, PNorm = 60.1013, GNorm = 1.5440, lr_0 = 3.2388e-04
Loss = 4.1911e-01, PNorm = 60.1118, GNorm = 1.4261, lr_0 = 3.2366e-04
Loss = 3.6726e-01, PNorm = 60.1144, GNorm = 1.0457, lr_0 = 3.2344e-04
Loss = 3.7549e-01, PNorm = 60.1228, GNorm = 1.2893, lr_0 = 3.2322e-04
Loss = 3.5697e-01, PNorm = 60.1271, GNorm = 1.4407, lr_0 = 3.2300e-04
Loss = 3.6069e-01, PNorm = 60.1325, GNorm = 1.5261, lr_0 = 3.2277e-04
Loss = 4.2914e-01, PNorm = 60.1420, GNorm = 1.3845, lr_0 = 3.2255e-04
Loss = 3.4972e-01, PNorm = 60.1463, GNorm = 1.8113, lr_0 = 3.2233e-04
Loss = 3.9691e-01, PNorm = 60.1491, GNorm = 1.1738, lr_0 = 3.2211e-04
Loss = 4.3921e-01, PNorm = 60.1557, GNorm = 1.4291, lr_0 = 3.2189e-04
Loss = 3.7983e-01, PNorm = 60.1600, GNorm = 1.3496, lr_0 = 3.2167e-04
Loss = 4.3844e-01, PNorm = 60.1643, GNorm = 1.4740, lr_0 = 3.2145e-04
Loss = 4.1670e-01, PNorm = 60.1681, GNorm = 1.3058, lr_0 = 3.2123e-04
Loss = 3.8274e-01, PNorm = 60.1677, GNorm = 1.3572, lr_0 = 3.2101e-04
Loss = 3.8367e-01, PNorm = 60.1724, GNorm = 1.5785, lr_0 = 3.2079e-04
Loss = 3.2859e-01, PNorm = 60.1776, GNorm = 1.0331, lr_0 = 3.2057e-04
Loss = 3.7870e-01, PNorm = 60.1783, GNorm = 1.3587, lr_0 = 3.2035e-04
Loss = 4.0709e-01, PNorm = 60.1824, GNorm = 1.3355, lr_0 = 3.2013e-04
Loss = 3.5328e-01, PNorm = 60.1857, GNorm = 1.5253, lr_0 = 3.1991e-04
Loss = 3.7754e-01, PNorm = 60.1907, GNorm = 1.1743, lr_0 = 3.1969e-04
Loss = 3.4606e-01, PNorm = 60.1938, GNorm = 1.2084, lr_0 = 3.1947e-04
Loss = 4.2688e-01, PNorm = 60.1972, GNorm = 2.7716, lr_0 = 3.1925e-04
Loss = 3.4032e-01, PNorm = 60.2045, GNorm = 1.6851, lr_0 = 3.1904e-04
Loss = 3.7906e-01, PNorm = 60.2034, GNorm = 1.8822, lr_0 = 3.1882e-04
Loss = 4.2121e-01, PNorm = 60.2091, GNorm = 1.3396, lr_0 = 3.1860e-04
Loss = 3.8167e-01, PNorm = 60.2138, GNorm = 1.0528, lr_0 = 3.1838e-04
Loss = 4.1642e-01, PNorm = 60.2162, GNorm = 1.3617, lr_0 = 3.1816e-04
Loss = 4.1557e-01, PNorm = 60.2222, GNorm = 1.9556, lr_0 = 3.1794e-04
Loss = 3.7537e-01, PNorm = 60.2229, GNorm = 1.5067, lr_0 = 3.1773e-04
Loss = 3.4878e-01, PNorm = 60.2296, GNorm = 1.6232, lr_0 = 3.1751e-04
Loss = 4.1094e-01, PNorm = 60.2342, GNorm = 1.9864, lr_0 = 3.1729e-04
Loss = 3.5742e-01, PNorm = 60.2382, GNorm = 0.9547, lr_0 = 3.1707e-04
Loss = 3.7717e-01, PNorm = 60.2439, GNorm = 0.9372, lr_0 = 3.1686e-04
Loss = 3.6906e-01, PNorm = 60.2470, GNorm = 1.9529, lr_0 = 3.1664e-04
Loss = 3.5867e-01, PNorm = 60.2520, GNorm = 1.8651, lr_0 = 3.1642e-04
Loss = 3.8621e-01, PNorm = 60.2533, GNorm = 1.5244, lr_0 = 3.1621e-04
Validation mae = 0.113467
Epoch 16
Loss = 3.5845e-01, PNorm = 60.2633, GNorm = 1.2902, lr_0 = 3.1599e-04
Loss = 4.5749e-01, PNorm = 60.2665, GNorm = 1.3130, lr_0 = 3.1577e-04
Loss = 3.3611e-01, PNorm = 60.2722, GNorm = 1.3752, lr_0 = 3.1556e-04
Loss = 3.8111e-01, PNorm = 60.2768, GNorm = 1.5929, lr_0 = 3.1534e-04
Loss = 3.4468e-01, PNorm = 60.2794, GNorm = 1.4752, lr_0 = 3.1512e-04
Loss = 3.5033e-01, PNorm = 60.2826, GNorm = 0.9276, lr_0 = 3.1491e-04
Loss = 3.7829e-01, PNorm = 60.2879, GNorm = 1.2181, lr_0 = 3.1469e-04
Loss = 3.8017e-01, PNorm = 60.2937, GNorm = 1.8015, lr_0 = 3.1448e-04
Loss = 4.6475e-01, PNorm = 60.3029, GNorm = 1.7339, lr_0 = 3.1426e-04
Loss = 3.4465e-01, PNorm = 60.3063, GNorm = 1.2140, lr_0 = 3.1405e-04
Loss = 3.5796e-01, PNorm = 60.3093, GNorm = 1.4229, lr_0 = 3.1383e-04
Loss = 4.2738e-01, PNorm = 60.3097, GNorm = 1.3469, lr_0 = 3.1362e-04
Loss = 4.0705e-01, PNorm = 60.3169, GNorm = 1.6330, lr_0 = 3.1340e-04
Loss = 3.5806e-01, PNorm = 60.3197, GNorm = 1.6289, lr_0 = 3.1319e-04
Loss = 3.7491e-01, PNorm = 60.3221, GNorm = 1.1720, lr_0 = 3.1297e-04
Loss = 3.6090e-01, PNorm = 60.3261, GNorm = 2.0219, lr_0 = 3.1276e-04
Loss = 3.5519e-01, PNorm = 60.3318, GNorm = 0.8875, lr_0 = 3.1254e-04
Loss = 3.2242e-01, PNorm = 60.3371, GNorm = 1.0227, lr_0 = 3.1233e-04
Loss = 3.6843e-01, PNorm = 60.3382, GNorm = 1.5603, lr_0 = 3.1212e-04
Loss = 3.6726e-01, PNorm = 60.3409, GNorm = 1.3442, lr_0 = 3.1190e-04
Loss = 3.1955e-01, PNorm = 60.3429, GNorm = 1.5708, lr_0 = 3.1169e-04
Loss = 4.0680e-01, PNorm = 60.3472, GNorm = 1.5666, lr_0 = 3.1147e-04
Loss = 3.6764e-01, PNorm = 60.3542, GNorm = 1.1210, lr_0 = 3.1126e-04
Loss = 4.0124e-01, PNorm = 60.3577, GNorm = 1.0198, lr_0 = 3.1105e-04
Loss = 3.6461e-01, PNorm = 60.3636, GNorm = 1.9781, lr_0 = 3.1083e-04
Loss = 3.8843e-01, PNorm = 60.3679, GNorm = 1.1759, lr_0 = 3.1062e-04
Loss = 3.5329e-01, PNorm = 60.3696, GNorm = 1.0100, lr_0 = 3.1041e-04
Loss = 3.3711e-01, PNorm = 60.3706, GNorm = 0.9158, lr_0 = 3.1020e-04
Loss = 3.6112e-01, PNorm = 60.3746, GNorm = 1.3213, lr_0 = 3.0998e-04
Loss = 3.3851e-01, PNorm = 60.3748, GNorm = 2.2137, lr_0 = 3.0977e-04
Loss = 4.1843e-01, PNorm = 60.3786, GNorm = 1.5006, lr_0 = 3.0956e-04
Loss = 4.0069e-01, PNorm = 60.3823, GNorm = 1.2267, lr_0 = 3.0935e-04
Loss = 3.4770e-01, PNorm = 60.3875, GNorm = 1.8025, lr_0 = 3.0914e-04
Loss = 3.5599e-01, PNorm = 60.3925, GNorm = 1.2088, lr_0 = 3.0892e-04
Loss = 4.0264e-01, PNorm = 60.3997, GNorm = 0.9852, lr_0 = 3.0871e-04
Loss = 3.5941e-01, PNorm = 60.4068, GNorm = 2.1280, lr_0 = 3.0850e-04
Loss = 3.9863e-01, PNorm = 60.4088, GNorm = 1.3885, lr_0 = 3.0829e-04
Loss = 4.1543e-01, PNorm = 60.4122, GNorm = 1.1480, lr_0 = 3.0808e-04
Loss = 4.0957e-01, PNorm = 60.4210, GNorm = 1.5004, lr_0 = 3.0787e-04
Loss = 4.4303e-01, PNorm = 60.4229, GNorm = 1.5784, lr_0 = 3.0766e-04
Loss = 4.2778e-01, PNorm = 60.4316, GNorm = 1.2645, lr_0 = 3.0745e-04
Loss = 3.8446e-01, PNorm = 60.4298, GNorm = 1.5527, lr_0 = 3.0723e-04
Loss = 3.6729e-01, PNorm = 60.4306, GNorm = 1.2886, lr_0 = 3.0702e-04
Loss = 3.7115e-01, PNorm = 60.4362, GNorm = 1.7282, lr_0 = 3.0681e-04
Loss = 4.0772e-01, PNorm = 60.4390, GNorm = 1.2088, lr_0 = 3.0660e-04
Loss = 3.9604e-01, PNorm = 60.4468, GNorm = 1.3728, lr_0 = 3.0639e-04
Loss = 3.7397e-01, PNorm = 60.4495, GNorm = 1.3070, lr_0 = 3.0618e-04
Loss = 3.7314e-01, PNorm = 60.4549, GNorm = 1.6273, lr_0 = 3.0597e-04
Loss = 3.5915e-01, PNorm = 60.4577, GNorm = 1.2904, lr_0 = 3.0576e-04
Loss = 3.6813e-01, PNorm = 60.4615, GNorm = 1.7603, lr_0 = 3.0555e-04
Loss = 3.7641e-01, PNorm = 60.4681, GNorm = 1.5216, lr_0 = 3.0535e-04
Loss = 3.2837e-01, PNorm = 60.4749, GNorm = 2.1290, lr_0 = 3.0514e-04
Loss = 3.7279e-01, PNorm = 60.4751, GNorm = 0.9832, lr_0 = 3.0493e-04
Loss = 4.1978e-01, PNorm = 60.4792, GNorm = 1.4122, lr_0 = 3.0472e-04
Loss = 3.4359e-01, PNorm = 60.4834, GNorm = 1.0961, lr_0 = 3.0451e-04
Loss = 4.0856e-01, PNorm = 60.4896, GNorm = 2.0069, lr_0 = 3.0430e-04
Loss = 3.5711e-01, PNorm = 60.4953, GNorm = 1.1579, lr_0 = 3.0409e-04
Loss = 4.0823e-01, PNorm = 60.5009, GNorm = 1.1143, lr_0 = 3.0388e-04
Loss = 3.5252e-01, PNorm = 60.5076, GNorm = 1.4340, lr_0 = 3.0368e-04
Loss = 4.1844e-01, PNorm = 60.5154, GNorm = 1.7334, lr_0 = 3.0347e-04
Loss = 3.6698e-01, PNorm = 60.5211, GNorm = 1.2537, lr_0 = 3.0326e-04
Loss = 4.2777e-01, PNorm = 60.5268, GNorm = 2.1103, lr_0 = 3.0305e-04
Loss = 3.4243e-01, PNorm = 60.5348, GNorm = 0.9027, lr_0 = 3.0284e-04
Loss = 3.4019e-01, PNorm = 60.5390, GNorm = 1.4159, lr_0 = 3.0264e-04
Loss = 3.2897e-01, PNorm = 60.5405, GNorm = 0.9328, lr_0 = 3.0243e-04
Loss = 3.8077e-01, PNorm = 60.5417, GNorm = 1.3353, lr_0 = 3.0222e-04
Loss = 3.7689e-01, PNorm = 60.5444, GNorm = 1.4050, lr_0 = 3.0202e-04
Loss = 4.2920e-01, PNorm = 60.5481, GNorm = 1.1336, lr_0 = 3.0181e-04
Loss = 3.4209e-01, PNorm = 60.5510, GNorm = 0.9600, lr_0 = 3.0160e-04
Loss = 3.9136e-01, PNorm = 60.5545, GNorm = 1.3938, lr_0 = 3.0140e-04
Loss = 3.8054e-01, PNorm = 60.5627, GNorm = 1.1867, lr_0 = 3.0119e-04
Loss = 4.1328e-01, PNorm = 60.5686, GNorm = 1.5669, lr_0 = 3.0098e-04
Loss = 4.1461e-01, PNorm = 60.5732, GNorm = 1.5071, lr_0 = 3.0078e-04
Loss = 4.1560e-01, PNorm = 60.5753, GNorm = 1.5492, lr_0 = 3.0057e-04
Loss = 3.9070e-01, PNorm = 60.5786, GNorm = 2.9165, lr_0 = 3.0036e-04
Loss = 3.4364e-01, PNorm = 60.5818, GNorm = 1.4258, lr_0 = 3.0016e-04
Loss = 3.4684e-01, PNorm = 60.5843, GNorm = 1.2442, lr_0 = 2.9995e-04
Loss = 3.2895e-01, PNorm = 60.5843, GNorm = 1.1328, lr_0 = 2.9975e-04
Loss = 3.7679e-01, PNorm = 60.5913, GNorm = 2.0386, lr_0 = 2.9954e-04
Loss = 3.4077e-01, PNorm = 60.5928, GNorm = 1.4043, lr_0 = 2.9934e-04
Loss = 3.4581e-01, PNorm = 60.5962, GNorm = 1.2931, lr_0 = 2.9913e-04
Loss = 3.4629e-01, PNorm = 60.6011, GNorm = 1.2317, lr_0 = 2.9893e-04
Loss = 3.4756e-01, PNorm = 60.6031, GNorm = 0.8965, lr_0 = 2.9872e-04
Loss = 4.3032e-01, PNorm = 60.6079, GNorm = 1.3685, lr_0 = 2.9852e-04
Loss = 3.9531e-01, PNorm = 60.6110, GNorm = 1.8426, lr_0 = 2.9831e-04
Loss = 3.5842e-01, PNorm = 60.6137, GNorm = 1.2661, lr_0 = 2.9811e-04
Loss = 3.6585e-01, PNorm = 60.6172, GNorm = 1.5055, lr_0 = 2.9790e-04
Loss = 3.9417e-01, PNorm = 60.6192, GNorm = 1.1965, lr_0 = 2.9770e-04
Loss = 3.9776e-01, PNorm = 60.6230, GNorm = 1.5193, lr_0 = 2.9750e-04
Loss = 3.4359e-01, PNorm = 60.6283, GNorm = 1.0921, lr_0 = 2.9729e-04
Loss = 3.6922e-01, PNorm = 60.6296, GNorm = 1.1958, lr_0 = 2.9709e-04
Loss = 3.8692e-01, PNorm = 60.6316, GNorm = 1.1178, lr_0 = 2.9689e-04
Loss = 4.1450e-01, PNorm = 60.6394, GNorm = 1.2332, lr_0 = 2.9668e-04
Loss = 4.2937e-01, PNorm = 60.6405, GNorm = 2.6619, lr_0 = 2.9648e-04
Loss = 4.2317e-01, PNorm = 60.6453, GNorm = 1.4974, lr_0 = 2.9628e-04
Loss = 4.1847e-01, PNorm = 60.6514, GNorm = 1.4008, lr_0 = 2.9607e-04
Loss = 3.3832e-01, PNorm = 60.6555, GNorm = 1.6839, lr_0 = 2.9587e-04
Loss = 3.8458e-01, PNorm = 60.6568, GNorm = 1.4480, lr_0 = 2.9567e-04
Loss = 3.2985e-01, PNorm = 60.6622, GNorm = 1.5462, lr_0 = 2.9546e-04
Loss = 3.7514e-01, PNorm = 60.6678, GNorm = 1.4828, lr_0 = 2.9526e-04
Loss = 4.1216e-01, PNorm = 60.6684, GNorm = 1.7551, lr_0 = 2.9506e-04
Loss = 3.5124e-01, PNorm = 60.6738, GNorm = 1.0370, lr_0 = 2.9486e-04
Loss = 3.9941e-01, PNorm = 60.6773, GNorm = 1.3513, lr_0 = 2.9466e-04
Loss = 3.6906e-01, PNorm = 60.6817, GNorm = 1.0909, lr_0 = 2.9445e-04
Loss = 4.1059e-01, PNorm = 60.6874, GNorm = 1.5733, lr_0 = 2.9425e-04
Loss = 3.6623e-01, PNorm = 60.6916, GNorm = 1.3546, lr_0 = 2.9405e-04
Loss = 3.9284e-01, PNorm = 60.6915, GNorm = 2.5003, lr_0 = 2.9385e-04
Loss = 4.8647e-01, PNorm = 60.6950, GNorm = 2.0580, lr_0 = 2.9365e-04
Loss = 3.7410e-01, PNorm = 60.6984, GNorm = 1.0779, lr_0 = 2.9345e-04
Loss = 3.3978e-01, PNorm = 60.6988, GNorm = 1.3655, lr_0 = 2.9325e-04
Loss = 4.3203e-01, PNorm = 60.7000, GNorm = 1.0187, lr_0 = 2.9305e-04
Loss = 3.8825e-01, PNorm = 60.7054, GNorm = 1.2785, lr_0 = 2.9284e-04
Loss = 3.3430e-01, PNorm = 60.7095, GNorm = 1.0710, lr_0 = 2.9264e-04
Loss = 3.7125e-01, PNorm = 60.7128, GNorm = 1.2162, lr_0 = 2.9244e-04
Loss = 3.6938e-01, PNorm = 60.7156, GNorm = 1.1534, lr_0 = 2.9224e-04
Loss = 3.9308e-01, PNorm = 60.7189, GNorm = 1.3963, lr_0 = 2.9204e-04
Loss = 4.2710e-01, PNorm = 60.7237, GNorm = 2.1002, lr_0 = 2.9184e-04
Loss = 3.6109e-01, PNorm = 60.7289, GNorm = 1.5543, lr_0 = 2.9164e-04
Loss = 3.4974e-01, PNorm = 60.7316, GNorm = 1.0408, lr_0 = 2.9144e-04
Loss = 4.2623e-01, PNorm = 60.7314, GNorm = 1.1180, lr_0 = 2.9124e-04
Validation mae = 0.112504
Epoch 17
Loss = 3.5884e-01, PNorm = 60.7344, GNorm = 1.4986, lr_0 = 2.9104e-04
Loss = 3.5539e-01, PNorm = 60.7370, GNorm = 1.4121, lr_0 = 2.9084e-04
Loss = 3.9991e-01, PNorm = 60.7406, GNorm = 1.1997, lr_0 = 2.9065e-04
Loss = 4.0731e-01, PNorm = 60.7448, GNorm = 1.3681, lr_0 = 2.9045e-04
Loss = 3.6432e-01, PNorm = 60.7539, GNorm = 1.0734, lr_0 = 2.9025e-04
Loss = 3.5469e-01, PNorm = 60.7602, GNorm = 0.9979, lr_0 = 2.9005e-04
Loss = 4.2509e-01, PNorm = 60.7628, GNorm = 1.3728, lr_0 = 2.8985e-04
Loss = 3.7941e-01, PNorm = 60.7672, GNorm = 1.3313, lr_0 = 2.8965e-04
Loss = 3.8748e-01, PNorm = 60.7676, GNorm = 1.4561, lr_0 = 2.8945e-04
Loss = 3.9739e-01, PNorm = 60.7707, GNorm = 1.4879, lr_0 = 2.8925e-04
Loss = 3.4403e-01, PNorm = 60.7775, GNorm = 0.9795, lr_0 = 2.8906e-04
Loss = 3.7514e-01, PNorm = 60.7804, GNorm = 1.3366, lr_0 = 2.8886e-04
Loss = 3.9031e-01, PNorm = 60.7879, GNorm = 1.2670, lr_0 = 2.8866e-04
Loss = 4.3653e-01, PNorm = 60.7906, GNorm = 1.2763, lr_0 = 2.8846e-04
Loss = 3.4746e-01, PNorm = 60.7953, GNorm = 1.3815, lr_0 = 2.8826e-04
Loss = 3.6809e-01, PNorm = 60.8020, GNorm = 1.2262, lr_0 = 2.8807e-04
Loss = 3.5640e-01, PNorm = 60.8074, GNorm = 1.4839, lr_0 = 2.8787e-04
Loss = 4.0163e-01, PNorm = 60.8104, GNorm = 1.9112, lr_0 = 2.8767e-04
Loss = 3.6850e-01, PNorm = 60.8150, GNorm = 2.0012, lr_0 = 2.8748e-04
Loss = 3.5003e-01, PNorm = 60.8162, GNorm = 1.6655, lr_0 = 2.8728e-04
Loss = 3.8420e-01, PNorm = 60.8191, GNorm = 1.3278, lr_0 = 2.8708e-04
Loss = 4.0564e-01, PNorm = 60.8263, GNorm = 1.5451, lr_0 = 2.8689e-04
Loss = 3.5747e-01, PNorm = 60.8294, GNorm = 2.0868, lr_0 = 2.8669e-04
Loss = 3.6053e-01, PNorm = 60.8356, GNorm = 1.3558, lr_0 = 2.8649e-04
Loss = 3.5798e-01, PNorm = 60.8415, GNorm = 2.4965, lr_0 = 2.8630e-04
Loss = 3.4065e-01, PNorm = 60.8462, GNorm = 1.5972, lr_0 = 2.8610e-04
Loss = 3.4255e-01, PNorm = 60.8495, GNorm = 1.5794, lr_0 = 2.8590e-04
Loss = 4.0330e-01, PNorm = 60.8512, GNorm = 1.2891, lr_0 = 2.8571e-04
Loss = 3.6457e-01, PNorm = 60.8537, GNorm = 1.1280, lr_0 = 2.8551e-04
Loss = 3.6435e-01, PNorm = 60.8567, GNorm = 1.0898, lr_0 = 2.8532e-04
Loss = 3.9012e-01, PNorm = 60.8602, GNorm = 1.5794, lr_0 = 2.8512e-04
Loss = 3.3476e-01, PNorm = 60.8692, GNorm = 3.1187, lr_0 = 2.8493e-04
Loss = 3.5663e-01, PNorm = 60.8725, GNorm = 1.0445, lr_0 = 2.8473e-04
Loss = 3.7026e-01, PNorm = 60.8751, GNorm = 1.7471, lr_0 = 2.8454e-04
Loss = 3.5345e-01, PNorm = 60.8797, GNorm = 1.5090, lr_0 = 2.8434e-04
Loss = 3.8477e-01, PNorm = 60.8833, GNorm = 1.3626, lr_0 = 2.8415e-04
Loss = 3.4795e-01, PNorm = 60.8869, GNorm = 1.5869, lr_0 = 2.8395e-04
Loss = 3.4520e-01, PNorm = 60.8858, GNorm = 1.6708, lr_0 = 2.8376e-04
Loss = 4.1090e-01, PNorm = 60.8944, GNorm = 1.5869, lr_0 = 2.8356e-04
Loss = 4.5701e-01, PNorm = 60.9012, GNorm = 1.1626, lr_0 = 2.8337e-04
Loss = 3.6442e-01, PNorm = 60.9098, GNorm = 1.3417, lr_0 = 2.8317e-04
Loss = 3.5526e-01, PNorm = 60.9148, GNorm = 0.9820, lr_0 = 2.8298e-04
Loss = 3.7343e-01, PNorm = 60.9171, GNorm = 1.4731, lr_0 = 2.8279e-04
Loss = 3.7160e-01, PNorm = 60.9206, GNorm = 1.8552, lr_0 = 2.8259e-04
Loss = 3.4244e-01, PNorm = 60.9213, GNorm = 1.0288, lr_0 = 2.8240e-04
Loss = 3.4256e-01, PNorm = 60.9213, GNorm = 1.6799, lr_0 = 2.8221e-04
Loss = 3.8404e-01, PNorm = 60.9259, GNorm = 1.1097, lr_0 = 2.8201e-04
Loss = 3.6612e-01, PNorm = 60.9288, GNorm = 1.1843, lr_0 = 2.8182e-04
Loss = 3.6989e-01, PNorm = 60.9325, GNorm = 2.0949, lr_0 = 2.8163e-04
Loss = 3.9824e-01, PNorm = 60.9424, GNorm = 1.7367, lr_0 = 2.8143e-04
Loss = 3.0187e-01, PNorm = 60.9460, GNorm = 1.3725, lr_0 = 2.8124e-04
Loss = 3.9711e-01, PNorm = 60.9499, GNorm = 0.8793, lr_0 = 2.8105e-04
Loss = 3.4117e-01, PNorm = 60.9553, GNorm = 1.6175, lr_0 = 2.8085e-04
Loss = 4.0495e-01, PNorm = 60.9594, GNorm = 1.4158, lr_0 = 2.8066e-04
Loss = 3.5707e-01, PNorm = 60.9615, GNorm = 1.2720, lr_0 = 2.8047e-04
Loss = 3.3574e-01, PNorm = 60.9616, GNorm = 1.7688, lr_0 = 2.8028e-04
Loss = 3.7101e-01, PNorm = 60.9649, GNorm = 1.5040, lr_0 = 2.8009e-04
Loss = 3.8416e-01, PNorm = 60.9680, GNorm = 1.7008, lr_0 = 2.7989e-04
Loss = 3.3822e-01, PNorm = 60.9706, GNorm = 1.3997, lr_0 = 2.7970e-04
Loss = 3.7535e-01, PNorm = 60.9732, GNorm = 1.9624, lr_0 = 2.7951e-04
Loss = 3.8404e-01, PNorm = 60.9757, GNorm = 2.0900, lr_0 = 2.7932e-04
Loss = 3.6952e-01, PNorm = 60.9814, GNorm = 1.5760, lr_0 = 2.7913e-04
Loss = 3.5883e-01, PNorm = 60.9814, GNorm = 1.2503, lr_0 = 2.7894e-04
Loss = 3.9549e-01, PNorm = 60.9834, GNorm = 1.1382, lr_0 = 2.7875e-04
Loss = 4.3866e-01, PNorm = 60.9868, GNorm = 1.5801, lr_0 = 2.7855e-04
Loss = 3.6931e-01, PNorm = 60.9866, GNorm = 1.2252, lr_0 = 2.7836e-04
Loss = 3.6035e-01, PNorm = 60.9911, GNorm = 1.6538, lr_0 = 2.7817e-04
Loss = 3.7984e-01, PNorm = 60.9990, GNorm = 1.7237, lr_0 = 2.7798e-04
Loss = 3.4420e-01, PNorm = 60.9968, GNorm = 1.5112, lr_0 = 2.7779e-04
Loss = 4.2743e-01, PNorm = 60.9954, GNorm = 1.8246, lr_0 = 2.7760e-04
Loss = 3.7994e-01, PNorm = 61.0000, GNorm = 1.6025, lr_0 = 2.7741e-04
Loss = 3.7327e-01, PNorm = 61.0039, GNorm = 1.6021, lr_0 = 2.7722e-04
Loss = 3.9852e-01, PNorm = 61.0050, GNorm = 1.6203, lr_0 = 2.7703e-04
Loss = 3.9647e-01, PNorm = 61.0073, GNorm = 1.0203, lr_0 = 2.7684e-04
Loss = 3.6692e-01, PNorm = 61.0124, GNorm = 1.2691, lr_0 = 2.7665e-04
Loss = 3.8275e-01, PNorm = 61.0171, GNorm = 1.2422, lr_0 = 2.7646e-04
Loss = 3.4413e-01, PNorm = 61.0166, GNorm = 1.1088, lr_0 = 2.7627e-04
Loss = 4.0166e-01, PNorm = 61.0159, GNorm = 1.5276, lr_0 = 2.7608e-04
Loss = 3.7148e-01, PNorm = 61.0216, GNorm = 1.3918, lr_0 = 2.7590e-04
Loss = 3.9182e-01, PNorm = 61.0246, GNorm = 1.1812, lr_0 = 2.7571e-04
Loss = 3.2277e-01, PNorm = 61.0275, GNorm = 1.1237, lr_0 = 2.7552e-04
Loss = 3.6727e-01, PNorm = 61.0285, GNorm = 1.1189, lr_0 = 2.7533e-04
Loss = 4.4399e-01, PNorm = 61.0322, GNorm = 1.6590, lr_0 = 2.7514e-04
Loss = 3.8269e-01, PNorm = 61.0386, GNorm = 1.6229, lr_0 = 2.7495e-04
Loss = 3.6925e-01, PNorm = 61.0425, GNorm = 0.9767, lr_0 = 2.7476e-04
Loss = 3.8847e-01, PNorm = 61.0421, GNorm = 1.8273, lr_0 = 2.7457e-04
Loss = 4.0411e-01, PNorm = 61.0432, GNorm = 1.2744, lr_0 = 2.7439e-04
Loss = 3.5607e-01, PNorm = 61.0484, GNorm = 1.5451, lr_0 = 2.7420e-04
Loss = 3.9741e-01, PNorm = 61.0516, GNorm = 1.7928, lr_0 = 2.7401e-04
Loss = 3.8067e-01, PNorm = 61.0592, GNorm = 1.6818, lr_0 = 2.7382e-04
Loss = 4.0407e-01, PNorm = 61.0641, GNorm = 2.0911, lr_0 = 2.7364e-04
Loss = 3.8563e-01, PNorm = 61.0675, GNorm = 0.9672, lr_0 = 2.7345e-04
Loss = 4.3253e-01, PNorm = 61.0714, GNorm = 1.9561, lr_0 = 2.7326e-04
Loss = 3.5451e-01, PNorm = 61.0739, GNorm = 1.5729, lr_0 = 2.7307e-04
Loss = 3.8657e-01, PNorm = 61.0781, GNorm = 1.3344, lr_0 = 2.7289e-04
Loss = 4.0560e-01, PNorm = 61.0798, GNorm = 2.1680, lr_0 = 2.7270e-04
Loss = 4.0336e-01, PNorm = 61.0834, GNorm = 1.9463, lr_0 = 2.7251e-04
Loss = 3.3125e-01, PNorm = 61.0897, GNorm = 1.3726, lr_0 = 2.7233e-04
Loss = 3.2860e-01, PNorm = 61.0976, GNorm = 1.3014, lr_0 = 2.7214e-04
Loss = 3.5046e-01, PNorm = 61.1039, GNorm = 1.4298, lr_0 = 2.7195e-04
Loss = 3.5129e-01, PNorm = 61.1069, GNorm = 1.4605, lr_0 = 2.7177e-04
Loss = 4.0501e-01, PNorm = 61.1118, GNorm = 1.2080, lr_0 = 2.7158e-04
Loss = 3.8335e-01, PNorm = 61.1142, GNorm = 1.7418, lr_0 = 2.7139e-04
Loss = 3.8294e-01, PNorm = 61.1163, GNorm = 1.2729, lr_0 = 2.7121e-04
Loss = 3.6963e-01, PNorm = 61.1180, GNorm = 1.2762, lr_0 = 2.7102e-04
Loss = 3.8020e-01, PNorm = 61.1215, GNorm = 1.2414, lr_0 = 2.7084e-04
Loss = 3.8785e-01, PNorm = 61.1231, GNorm = 1.8455, lr_0 = 2.7065e-04
Loss = 3.7143e-01, PNorm = 61.1288, GNorm = 1.5717, lr_0 = 2.7047e-04
Loss = 3.8913e-01, PNorm = 61.1310, GNorm = 1.4746, lr_0 = 2.7028e-04
Loss = 3.4741e-01, PNorm = 61.1342, GNorm = 1.1701, lr_0 = 2.7010e-04
Loss = 3.5535e-01, PNorm = 61.1373, GNorm = 1.4089, lr_0 = 2.6991e-04
Loss = 4.1105e-01, PNorm = 61.1410, GNorm = 1.1934, lr_0 = 2.6973e-04
Loss = 3.6826e-01, PNorm = 61.1463, GNorm = 1.5294, lr_0 = 2.6954e-04
Loss = 3.8989e-01, PNorm = 61.1481, GNorm = 1.7137, lr_0 = 2.6936e-04
Loss = 3.3067e-01, PNorm = 61.1509, GNorm = 0.9039, lr_0 = 2.6917e-04
Loss = 3.5467e-01, PNorm = 61.1548, GNorm = 0.9919, lr_0 = 2.6899e-04
Loss = 4.2912e-01, PNorm = 61.1549, GNorm = 1.6882, lr_0 = 2.6880e-04
Loss = 3.6815e-01, PNorm = 61.1530, GNorm = 2.0236, lr_0 = 2.6862e-04
Loss = 3.4808e-01, PNorm = 61.1541, GNorm = 1.2084, lr_0 = 2.6844e-04
Loss = 3.8759e-01, PNorm = 61.1557, GNorm = 1.7605, lr_0 = 2.6825e-04
Validation mae = 0.113690
Epoch 18
Loss = 3.5521e-01, PNorm = 61.1614, GNorm = 1.7310, lr_0 = 2.6807e-04
Loss = 3.8200e-01, PNorm = 61.1650, GNorm = 1.1462, lr_0 = 2.6788e-04
Loss = 3.4569e-01, PNorm = 61.1678, GNorm = 1.9443, lr_0 = 2.6770e-04
Loss = 3.2428e-01, PNorm = 61.1734, GNorm = 1.1241, lr_0 = 2.6752e-04
Loss = 3.9393e-01, PNorm = 61.1748, GNorm = 1.5725, lr_0 = 2.6733e-04
Loss = 3.7559e-01, PNorm = 61.1823, GNorm = 1.5544, lr_0 = 2.6715e-04
Loss = 4.1481e-01, PNorm = 61.1844, GNorm = 1.4311, lr_0 = 2.6697e-04
Loss = 3.1143e-01, PNorm = 61.1870, GNorm = 1.0271, lr_0 = 2.6678e-04
Loss = 3.9223e-01, PNorm = 61.1891, GNorm = 1.7252, lr_0 = 2.6660e-04
Loss = 3.9755e-01, PNorm = 61.1902, GNorm = 2.0703, lr_0 = 2.6642e-04
Loss = 3.7460e-01, PNorm = 61.1930, GNorm = 1.7446, lr_0 = 2.6624e-04
Loss = 4.1340e-01, PNorm = 61.1957, GNorm = 1.6476, lr_0 = 2.6605e-04
Loss = 3.7584e-01, PNorm = 61.2019, GNorm = 1.5765, lr_0 = 2.6587e-04
Loss = 4.0008e-01, PNorm = 61.2034, GNorm = 1.2948, lr_0 = 2.6569e-04
Loss = 3.7976e-01, PNorm = 61.2058, GNorm = 1.4650, lr_0 = 2.6551e-04
Loss = 4.2387e-01, PNorm = 61.2111, GNorm = 1.3870, lr_0 = 2.6533e-04
Loss = 3.5309e-01, PNorm = 61.2135, GNorm = 1.1452, lr_0 = 2.6514e-04
Loss = 3.7736e-01, PNorm = 61.2144, GNorm = 1.6576, lr_0 = 2.6496e-04
Loss = 3.7781e-01, PNorm = 61.2170, GNorm = 1.6371, lr_0 = 2.6478e-04
Loss = 3.6749e-01, PNorm = 61.2200, GNorm = 1.3672, lr_0 = 2.6460e-04
Loss = 3.7988e-01, PNorm = 61.2249, GNorm = 2.0240, lr_0 = 2.6442e-04
Loss = 3.6220e-01, PNorm = 61.2281, GNorm = 1.1779, lr_0 = 2.6424e-04
Loss = 3.5233e-01, PNorm = 61.2282, GNorm = 1.2951, lr_0 = 2.6406e-04
Loss = 3.8798e-01, PNorm = 61.2291, GNorm = 1.2984, lr_0 = 2.6388e-04
Loss = 3.7328e-01, PNorm = 61.2325, GNorm = 1.1788, lr_0 = 2.6369e-04
Loss = 3.5036e-01, PNorm = 61.2350, GNorm = 1.2642, lr_0 = 2.6351e-04
Loss = 3.6205e-01, PNorm = 61.2391, GNorm = 2.3472, lr_0 = 2.6333e-04
Loss = 3.6168e-01, PNorm = 61.2427, GNorm = 1.4239, lr_0 = 2.6315e-04
Loss = 3.3329e-01, PNorm = 61.2484, GNorm = 0.9954, lr_0 = 2.6297e-04
Loss = 3.5449e-01, PNorm = 61.2530, GNorm = 1.3377, lr_0 = 2.6279e-04
Loss = 3.7167e-01, PNorm = 61.2535, GNorm = 1.2031, lr_0 = 2.6261e-04
Loss = 3.3579e-01, PNorm = 61.2526, GNorm = 1.2028, lr_0 = 2.6243e-04
Loss = 3.4813e-01, PNorm = 61.2521, GNorm = 1.1155, lr_0 = 2.6225e-04
Loss = 3.6950e-01, PNorm = 61.2527, GNorm = 1.2774, lr_0 = 2.6207e-04
Loss = 3.7503e-01, PNorm = 61.2546, GNorm = 1.2763, lr_0 = 2.6189e-04
Loss = 3.4687e-01, PNorm = 61.2613, GNorm = 1.1857, lr_0 = 2.6171e-04
Loss = 4.1161e-01, PNorm = 61.2634, GNorm = 1.6360, lr_0 = 2.6153e-04
Loss = 3.5049e-01, PNorm = 61.2694, GNorm = 1.6309, lr_0 = 2.6136e-04
Loss = 3.6430e-01, PNorm = 61.2747, GNorm = 1.1461, lr_0 = 2.6118e-04
Loss = 3.7515e-01, PNorm = 61.2791, GNorm = 1.4590, lr_0 = 2.6100e-04
Loss = 3.6080e-01, PNorm = 61.2812, GNorm = 1.1774, lr_0 = 2.6082e-04
Loss = 3.7426e-01, PNorm = 61.2823, GNorm = 1.0230, lr_0 = 2.6064e-04
Loss = 3.4852e-01, PNorm = 61.2877, GNorm = 1.2764, lr_0 = 2.6046e-04
Loss = 3.6583e-01, PNorm = 61.2903, GNorm = 1.6495, lr_0 = 2.6028e-04
Loss = 3.6088e-01, PNorm = 61.2947, GNorm = 1.2352, lr_0 = 2.6011e-04
Loss = 4.0422e-01, PNorm = 61.2995, GNorm = 1.9044, lr_0 = 2.5993e-04
Loss = 3.7091e-01, PNorm = 61.3002, GNorm = 1.2779, lr_0 = 2.5975e-04
Loss = 3.7729e-01, PNorm = 61.3010, GNorm = 1.9418, lr_0 = 2.5957e-04
Loss = 3.7572e-01, PNorm = 61.3028, GNorm = 1.3794, lr_0 = 2.5939e-04
Loss = 4.3041e-01, PNorm = 61.3084, GNorm = 1.5045, lr_0 = 2.5922e-04
Loss = 3.5208e-01, PNorm = 61.3162, GNorm = 1.3903, lr_0 = 2.5904e-04
Loss = 3.3958e-01, PNorm = 61.3166, GNorm = 1.5002, lr_0 = 2.5886e-04
Loss = 4.0891e-01, PNorm = 61.3193, GNorm = 1.7455, lr_0 = 2.5868e-04
Loss = 3.7196e-01, PNorm = 61.3258, GNorm = 1.6589, lr_0 = 2.5851e-04
Loss = 3.8024e-01, PNorm = 61.3265, GNorm = 1.5406, lr_0 = 2.5833e-04
Loss = 3.5656e-01, PNorm = 61.3293, GNorm = 1.3704, lr_0 = 2.5815e-04
Loss = 3.6255e-01, PNorm = 61.3292, GNorm = 1.3719, lr_0 = 2.5797e-04
Loss = 3.2986e-01, PNorm = 61.3329, GNorm = 2.2975, lr_0 = 2.5780e-04
Loss = 3.9181e-01, PNorm = 61.3351, GNorm = 1.3669, lr_0 = 2.5762e-04
Loss = 3.0928e-01, PNorm = 61.3368, GNorm = 1.2890, lr_0 = 2.5745e-04
Loss = 3.8146e-01, PNorm = 61.3432, GNorm = 1.1603, lr_0 = 2.5727e-04
Loss = 3.4543e-01, PNorm = 61.3479, GNorm = 1.4995, lr_0 = 2.5709e-04
Loss = 3.7979e-01, PNorm = 61.3507, GNorm = 1.7426, lr_0 = 2.5692e-04
Loss = 4.0980e-01, PNorm = 61.3531, GNorm = 1.6626, lr_0 = 2.5674e-04
Loss = 3.6582e-01, PNorm = 61.3567, GNorm = 1.3420, lr_0 = 2.5656e-04
Loss = 4.2023e-01, PNorm = 61.3642, GNorm = 1.2500, lr_0 = 2.5639e-04
Loss = 3.2484e-01, PNorm = 61.3624, GNorm = 1.4362, lr_0 = 2.5621e-04
Loss = 3.7837e-01, PNorm = 61.3618, GNorm = 1.0097, lr_0 = 2.5604e-04
Loss = 3.8190e-01, PNorm = 61.3638, GNorm = 1.5624, lr_0 = 2.5586e-04
Loss = 4.5255e-01, PNorm = 61.3653, GNorm = 2.4573, lr_0 = 2.5569e-04
Loss = 3.6110e-01, PNorm = 61.3703, GNorm = 2.0760, lr_0 = 2.5551e-04
Loss = 3.5681e-01, PNorm = 61.3695, GNorm = 1.3969, lr_0 = 2.5534e-04
Loss = 3.6257e-01, PNorm = 61.3733, GNorm = 0.9584, lr_0 = 2.5516e-04
Loss = 3.7311e-01, PNorm = 61.3772, GNorm = 1.4362, lr_0 = 2.5499e-04
Loss = 3.6608e-01, PNorm = 61.3807, GNorm = 1.5945, lr_0 = 2.5481e-04
Loss = 4.2629e-01, PNorm = 61.3836, GNorm = 2.3129, lr_0 = 2.5464e-04
Loss = 3.4657e-01, PNorm = 61.3847, GNorm = 1.6117, lr_0 = 2.5446e-04
Loss = 3.7669e-01, PNorm = 61.3891, GNorm = 1.4830, lr_0 = 2.5429e-04
Loss = 3.4244e-01, PNorm = 61.3930, GNorm = 1.3658, lr_0 = 2.5411e-04
Loss = 3.5687e-01, PNorm = 61.3943, GNorm = 1.2190, lr_0 = 2.5394e-04
Loss = 3.6292e-01, PNorm = 61.3968, GNorm = 1.5584, lr_0 = 2.5377e-04
Loss = 3.5917e-01, PNorm = 61.4015, GNorm = 1.2825, lr_0 = 2.5359e-04
Loss = 4.1736e-01, PNorm = 61.4033, GNorm = 2.0767, lr_0 = 2.5342e-04
Loss = 3.6495e-01, PNorm = 61.4041, GNorm = 1.7056, lr_0 = 2.5325e-04
Loss = 3.2888e-01, PNorm = 61.4075, GNorm = 1.2195, lr_0 = 2.5307e-04
Loss = 3.9574e-01, PNorm = 61.4136, GNorm = 1.1324, lr_0 = 2.5290e-04
Loss = 3.7227e-01, PNorm = 61.4164, GNorm = 2.6052, lr_0 = 2.5273e-04
Loss = 4.1682e-01, PNorm = 61.4207, GNorm = 1.5907, lr_0 = 2.5255e-04
Loss = 3.2763e-01, PNorm = 61.4234, GNorm = 1.3452, lr_0 = 2.5238e-04
Loss = 4.0698e-01, PNorm = 61.4269, GNorm = 1.4168, lr_0 = 2.5221e-04
Loss = 3.5375e-01, PNorm = 61.4292, GNorm = 1.2834, lr_0 = 2.5203e-04
Loss = 3.6779e-01, PNorm = 61.4291, GNorm = 2.0528, lr_0 = 2.5186e-04
Loss = 3.3939e-01, PNorm = 61.4330, GNorm = 0.9957, lr_0 = 2.5169e-04
Loss = 3.5675e-01, PNorm = 61.4377, GNorm = 1.5600, lr_0 = 2.5152e-04
Loss = 3.7806e-01, PNorm = 61.4401, GNorm = 1.4015, lr_0 = 2.5134e-04
Loss = 3.8140e-01, PNorm = 61.4436, GNorm = 1.2064, lr_0 = 2.5117e-04
Loss = 3.8205e-01, PNorm = 61.4435, GNorm = 1.0138, lr_0 = 2.5100e-04
Loss = 4.2104e-01, PNorm = 61.4432, GNorm = 1.8280, lr_0 = 2.5083e-04
Loss = 3.5760e-01, PNorm = 61.4483, GNorm = 0.9220, lr_0 = 2.5066e-04
Loss = 3.7504e-01, PNorm = 61.4527, GNorm = 1.4118, lr_0 = 2.5048e-04
Loss = 4.3269e-01, PNorm = 61.4573, GNorm = 1.5261, lr_0 = 2.5031e-04
Loss = 3.8292e-01, PNorm = 61.4596, GNorm = 1.2100, lr_0 = 2.5014e-04
Loss = 3.6731e-01, PNorm = 61.4567, GNorm = 1.0087, lr_0 = 2.4997e-04
Loss = 3.7722e-01, PNorm = 61.4577, GNorm = 2.5445, lr_0 = 2.4980e-04
Loss = 4.0156e-01, PNorm = 61.4622, GNorm = 1.8056, lr_0 = 2.4963e-04
Loss = 3.9084e-01, PNorm = 61.4648, GNorm = 1.3446, lr_0 = 2.4946e-04
Loss = 3.1692e-01, PNorm = 61.4700, GNorm = 1.2875, lr_0 = 2.4929e-04
Loss = 3.2371e-01, PNorm = 61.4716, GNorm = 1.5480, lr_0 = 2.4911e-04
Loss = 4.5465e-01, PNorm = 61.4742, GNorm = 3.7063, lr_0 = 2.4894e-04
Loss = 4.0684e-01, PNorm = 61.4822, GNorm = 1.6619, lr_0 = 2.4877e-04
Loss = 3.2963e-01, PNorm = 61.4867, GNorm = 1.0483, lr_0 = 2.4860e-04
Loss = 3.5931e-01, PNorm = 61.4861, GNorm = 1.2131, lr_0 = 2.4843e-04
Loss = 3.2332e-01, PNorm = 61.4876, GNorm = 1.3596, lr_0 = 2.4826e-04
Loss = 3.8509e-01, PNorm = 61.4909, GNorm = 2.0830, lr_0 = 2.4809e-04
Loss = 3.4367e-01, PNorm = 61.4923, GNorm = 1.3296, lr_0 = 2.4792e-04
Loss = 3.4588e-01, PNorm = 61.4925, GNorm = 1.7161, lr_0 = 2.4775e-04
Loss = 3.6090e-01, PNorm = 61.4930, GNorm = 1.3199, lr_0 = 2.4758e-04
Loss = 3.5245e-01, PNorm = 61.4964, GNorm = 1.2014, lr_0 = 2.4741e-04
Loss = 3.5614e-01, PNorm = 61.5015, GNorm = 1.1781, lr_0 = 2.4724e-04
Loss = 3.5738e-01, PNorm = 61.5105, GNorm = 1.4537, lr_0 = 2.4707e-04
Validation mae = 0.112543
Epoch 19
Loss = 3.9975e-01, PNorm = 61.5171, GNorm = 1.3189, lr_0 = 2.4690e-04
Loss = 3.2679e-01, PNorm = 61.5195, GNorm = 1.4079, lr_0 = 2.4674e-04
Loss = 3.6893e-01, PNorm = 61.5186, GNorm = 1.3768, lr_0 = 2.4657e-04
Loss = 3.3886e-01, PNorm = 61.5200, GNorm = 1.0969, lr_0 = 2.4640e-04
Loss = 3.7152e-01, PNorm = 61.5216, GNorm = 1.3122, lr_0 = 2.4623e-04
Loss = 3.6247e-01, PNorm = 61.5238, GNorm = 1.2706, lr_0 = 2.4606e-04
Loss = 3.5825e-01, PNorm = 61.5236, GNorm = 1.3342, lr_0 = 2.4589e-04
Loss = 3.5075e-01, PNorm = 61.5252, GNorm = 1.5245, lr_0 = 2.4572e-04
Loss = 3.8256e-01, PNorm = 61.5310, GNorm = 0.9818, lr_0 = 2.4556e-04
Loss = 3.8545e-01, PNorm = 61.5369, GNorm = 1.5211, lr_0 = 2.4539e-04
Loss = 3.7726e-01, PNorm = 61.5398, GNorm = 1.0738, lr_0 = 2.4522e-04
Loss = 3.4184e-01, PNorm = 61.5451, GNorm = 1.7928, lr_0 = 2.4505e-04
Loss = 3.8724e-01, PNorm = 61.5489, GNorm = 1.7419, lr_0 = 2.4488e-04
Loss = 3.4978e-01, PNorm = 61.5484, GNorm = 1.2296, lr_0 = 2.4472e-04
Loss = 3.6882e-01, PNorm = 61.5511, GNorm = 1.6664, lr_0 = 2.4455e-04
Loss = 3.9445e-01, PNorm = 61.5562, GNorm = 1.8113, lr_0 = 2.4438e-04
Loss = 3.7926e-01, PNorm = 61.5605, GNorm = 1.9539, lr_0 = 2.4421e-04
Loss = 3.1507e-01, PNorm = 61.5626, GNorm = 1.0168, lr_0 = 2.4405e-04
Loss = 3.3572e-01, PNorm = 61.5662, GNorm = 1.2048, lr_0 = 2.4388e-04
Loss = 3.4983e-01, PNorm = 61.5690, GNorm = 1.2446, lr_0 = 2.4371e-04
Loss = 3.6615e-01, PNorm = 61.5733, GNorm = 1.2745, lr_0 = 2.4354e-04
Loss = 3.5271e-01, PNorm = 61.5785, GNorm = 1.7978, lr_0 = 2.4338e-04
Loss = 3.3538e-01, PNorm = 61.5821, GNorm = 2.0587, lr_0 = 2.4321e-04
Loss = 2.9366e-01, PNorm = 61.5809, GNorm = 1.5056, lr_0 = 2.4304e-04
Loss = 3.7316e-01, PNorm = 61.5805, GNorm = 1.4704, lr_0 = 2.4288e-04
Loss = 3.6798e-01, PNorm = 61.5833, GNorm = 1.1841, lr_0 = 2.4271e-04
Loss = 3.5122e-01, PNorm = 61.5832, GNorm = 1.3886, lr_0 = 2.4254e-04
Loss = 3.6844e-01, PNorm = 61.5872, GNorm = 1.3965, lr_0 = 2.4238e-04
Loss = 3.5887e-01, PNorm = 61.5923, GNorm = 1.2291, lr_0 = 2.4221e-04
Loss = 4.0659e-01, PNorm = 61.5964, GNorm = 1.1874, lr_0 = 2.4205e-04
Loss = 3.3002e-01, PNorm = 61.6016, GNorm = 1.0897, lr_0 = 2.4188e-04
Loss = 3.7759e-01, PNorm = 61.6024, GNorm = 1.2107, lr_0 = 2.4171e-04
Loss = 4.1120e-01, PNorm = 61.6060, GNorm = 1.3431, lr_0 = 2.4155e-04
Loss = 3.9774e-01, PNorm = 61.6109, GNorm = 1.6282, lr_0 = 2.4138e-04
Loss = 4.0361e-01, PNorm = 61.6103, GNorm = 1.0909, lr_0 = 2.4122e-04
Loss = 4.0599e-01, PNorm = 61.6163, GNorm = 1.1179, lr_0 = 2.4105e-04
Loss = 3.3807e-01, PNorm = 61.6243, GNorm = 1.9009, lr_0 = 2.4089e-04
Loss = 3.5321e-01, PNorm = 61.6238, GNorm = 1.5427, lr_0 = 2.4072e-04
Loss = 3.1093e-01, PNorm = 61.6264, GNorm = 1.2935, lr_0 = 2.4056e-04
Loss = 3.5345e-01, PNorm = 61.6303, GNorm = 2.1368, lr_0 = 2.4039e-04
Loss = 3.2931e-01, PNorm = 61.6347, GNorm = 1.0436, lr_0 = 2.4023e-04
Loss = 3.6180e-01, PNorm = 61.6343, GNorm = 1.2490, lr_0 = 2.4006e-04
Loss = 3.8844e-01, PNorm = 61.6389, GNorm = 1.7085, lr_0 = 2.3990e-04
Loss = 3.8316e-01, PNorm = 61.6412, GNorm = 1.3267, lr_0 = 2.3974e-04
Loss = 4.2221e-01, PNorm = 61.6418, GNorm = 1.2394, lr_0 = 2.3957e-04
Loss = 3.6100e-01, PNorm = 61.6439, GNorm = 2.2464, lr_0 = 2.3941e-04
Loss = 3.6201e-01, PNorm = 61.6454, GNorm = 1.2163, lr_0 = 2.3924e-04
Loss = 3.9886e-01, PNorm = 61.6497, GNorm = 1.2062, lr_0 = 2.3908e-04
Loss = 3.9850e-01, PNorm = 61.6530, GNorm = 1.5890, lr_0 = 2.3892e-04
Loss = 3.4807e-01, PNorm = 61.6579, GNorm = 1.4717, lr_0 = 2.3875e-04
Loss = 3.7054e-01, PNorm = 61.6616, GNorm = 1.4740, lr_0 = 2.3859e-04
Loss = 3.6900e-01, PNorm = 61.6672, GNorm = 1.1232, lr_0 = 2.3842e-04
Loss = 3.8686e-01, PNorm = 61.6696, GNorm = 1.6968, lr_0 = 2.3826e-04
Loss = 3.6704e-01, PNorm = 61.6749, GNorm = 1.1983, lr_0 = 2.3810e-04
Loss = 3.7286e-01, PNorm = 61.6795, GNorm = 2.1021, lr_0 = 2.3794e-04
Loss = 3.5078e-01, PNorm = 61.6840, GNorm = 1.3049, lr_0 = 2.3777e-04
Loss = 3.3893e-01, PNorm = 61.6853, GNorm = 1.6772, lr_0 = 2.3761e-04
Loss = 3.4100e-01, PNorm = 61.6874, GNorm = 1.4061, lr_0 = 2.3745e-04
Loss = 3.4934e-01, PNorm = 61.6878, GNorm = 1.4155, lr_0 = 2.3728e-04
Loss = 4.0598e-01, PNorm = 61.6906, GNorm = 1.3977, lr_0 = 2.3712e-04
Loss = 3.3841e-01, PNorm = 61.6910, GNorm = 1.6061, lr_0 = 2.3696e-04
Loss = 3.7632e-01, PNorm = 61.6918, GNorm = 1.7526, lr_0 = 2.3680e-04
Loss = 3.9875e-01, PNorm = 61.6942, GNorm = 1.9898, lr_0 = 2.3663e-04
Loss = 3.4685e-01, PNorm = 61.6943, GNorm = 1.7168, lr_0 = 2.3647e-04
Loss = 3.5417e-01, PNorm = 61.6974, GNorm = 1.1630, lr_0 = 2.3631e-04
Loss = 3.7946e-01, PNorm = 61.7009, GNorm = 1.3478, lr_0 = 2.3615e-04
Loss = 3.6414e-01, PNorm = 61.7035, GNorm = 1.5077, lr_0 = 2.3599e-04
Loss = 3.3977e-01, PNorm = 61.7062, GNorm = 1.6610, lr_0 = 2.3582e-04
Loss = 3.4157e-01, PNorm = 61.7065, GNorm = 1.4880, lr_0 = 2.3566e-04
Loss = 3.4408e-01, PNorm = 61.7062, GNorm = 1.4940, lr_0 = 2.3550e-04
Loss = 3.4538e-01, PNorm = 61.7101, GNorm = 1.3499, lr_0 = 2.3534e-04
Loss = 3.9534e-01, PNorm = 61.7120, GNorm = 1.7196, lr_0 = 2.3518e-04
Loss = 3.4153e-01, PNorm = 61.7143, GNorm = 2.0460, lr_0 = 2.3502e-04
Loss = 3.9004e-01, PNorm = 61.7142, GNorm = 1.5042, lr_0 = 2.3486e-04
Loss = 3.0910e-01, PNorm = 61.7187, GNorm = 1.0605, lr_0 = 2.3470e-04
Loss = 4.1512e-01, PNorm = 61.7219, GNorm = 1.4013, lr_0 = 2.3454e-04
Loss = 3.5646e-01, PNorm = 61.7228, GNorm = 1.8530, lr_0 = 2.3437e-04
Loss = 3.8448e-01, PNorm = 61.7256, GNorm = 1.3682, lr_0 = 2.3421e-04
Loss = 3.7355e-01, PNorm = 61.7286, GNorm = 1.5647, lr_0 = 2.3405e-04
Loss = 3.4474e-01, PNorm = 61.7321, GNorm = 1.9591, lr_0 = 2.3389e-04
Loss = 4.0175e-01, PNorm = 61.7348, GNorm = 1.9432, lr_0 = 2.3373e-04
Loss = 4.1901e-01, PNorm = 61.7375, GNorm = 1.4486, lr_0 = 2.3357e-04
Loss = 3.5426e-01, PNorm = 61.7373, GNorm = 1.1853, lr_0 = 2.3341e-04
Loss = 3.6419e-01, PNorm = 61.7397, GNorm = 1.6542, lr_0 = 2.3325e-04
Loss = 3.7408e-01, PNorm = 61.7431, GNorm = 0.9597, lr_0 = 2.3309e-04
Loss = 3.8653e-01, PNorm = 61.7488, GNorm = 1.7507, lr_0 = 2.3293e-04
Loss = 3.2403e-01, PNorm = 61.7496, GNorm = 1.4347, lr_0 = 2.3277e-04
Loss = 3.8798e-01, PNorm = 61.7500, GNorm = 2.0147, lr_0 = 2.3261e-04
Loss = 4.4784e-01, PNorm = 61.7554, GNorm = 1.2545, lr_0 = 2.3246e-04
Loss = 3.4888e-01, PNorm = 61.7585, GNorm = 1.5957, lr_0 = 2.3230e-04
Loss = 3.4230e-01, PNorm = 61.7574, GNorm = 1.3938, lr_0 = 2.3214e-04
Loss = 3.3155e-01, PNorm = 61.7597, GNorm = 1.2161, lr_0 = 2.3198e-04
Loss = 3.9315e-01, PNorm = 61.7608, GNorm = 1.3121, lr_0 = 2.3182e-04
Loss = 3.7037e-01, PNorm = 61.7613, GNorm = 0.9681, lr_0 = 2.3166e-04
Loss = 3.4370e-01, PNorm = 61.7644, GNorm = 1.8885, lr_0 = 2.3150e-04
Loss = 4.0253e-01, PNorm = 61.7661, GNorm = 1.3592, lr_0 = 2.3134e-04
Loss = 3.8955e-01, PNorm = 61.7678, GNorm = 1.2169, lr_0 = 2.3118e-04
Loss = 3.4429e-01, PNorm = 61.7681, GNorm = 1.7881, lr_0 = 2.3103e-04
Loss = 3.3667e-01, PNorm = 61.7726, GNorm = 0.9354, lr_0 = 2.3087e-04
Loss = 3.6793e-01, PNorm = 61.7779, GNorm = 1.1141, lr_0 = 2.3071e-04
Loss = 3.4526e-01, PNorm = 61.7798, GNorm = 1.1811, lr_0 = 2.3055e-04
Loss = 3.3765e-01, PNorm = 61.7840, GNorm = 1.7382, lr_0 = 2.3039e-04
Loss = 3.9343e-01, PNorm = 61.7857, GNorm = 1.6796, lr_0 = 2.3024e-04
Loss = 3.6070e-01, PNorm = 61.7891, GNorm = 1.9564, lr_0 = 2.3008e-04
Loss = 4.0976e-01, PNorm = 61.7929, GNorm = 1.4994, lr_0 = 2.2992e-04
Loss = 3.4517e-01, PNorm = 61.7955, GNorm = 1.4243, lr_0 = 2.2976e-04
Loss = 4.5124e-01, PNorm = 61.7981, GNorm = 1.6502, lr_0 = 2.2961e-04
Loss = 3.9435e-01, PNorm = 61.8019, GNorm = 1.4668, lr_0 = 2.2945e-04
Loss = 3.0109e-01, PNorm = 61.8016, GNorm = 1.2398, lr_0 = 2.2929e-04
Loss = 3.9128e-01, PNorm = 61.8027, GNorm = 2.0742, lr_0 = 2.2913e-04
Loss = 3.8647e-01, PNorm = 61.8074, GNorm = 1.3493, lr_0 = 2.2898e-04
Loss = 3.9498e-01, PNorm = 61.8104, GNorm = 1.6540, lr_0 = 2.2882e-04
Loss = 3.9298e-01, PNorm = 61.8141, GNorm = 1.1407, lr_0 = 2.2866e-04
Loss = 3.7825e-01, PNorm = 61.8164, GNorm = 1.2818, lr_0 = 2.2851e-04
Loss = 3.8475e-01, PNorm = 61.8180, GNorm = 1.4936, lr_0 = 2.2835e-04
Loss = 3.2525e-01, PNorm = 61.8228, GNorm = 1.3192, lr_0 = 2.2819e-04
Loss = 4.0009e-01, PNorm = 61.8260, GNorm = 1.4912, lr_0 = 2.2804e-04
Loss = 3.4723e-01, PNorm = 61.8292, GNorm = 1.6394, lr_0 = 2.2788e-04
Loss = 3.8969e-01, PNorm = 61.8306, GNorm = 1.2313, lr_0 = 2.2773e-04
Loss = 4.0800e-01, PNorm = 61.8304, GNorm = 1.0471, lr_0 = 2.2757e-04
Validation mae = 0.111930
Epoch 20
Loss = 4.0031e-01, PNorm = 61.8314, GNorm = 1.1948, lr_0 = 2.2741e-04
Loss = 3.7409e-01, PNorm = 61.8312, GNorm = 1.1385, lr_0 = 2.2726e-04
Loss = 3.5871e-01, PNorm = 61.8370, GNorm = 1.8624, lr_0 = 2.2710e-04
Loss = 3.0353e-01, PNorm = 61.8402, GNorm = 1.2562, lr_0 = 2.2695e-04
Loss = 3.3235e-01, PNorm = 61.8426, GNorm = 1.1818, lr_0 = 2.2679e-04
Loss = 3.8361e-01, PNorm = 61.8459, GNorm = 1.4632, lr_0 = 2.2664e-04
Loss = 3.2431e-01, PNorm = 61.8493, GNorm = 1.2322, lr_0 = 2.2648e-04
Loss = 2.9811e-01, PNorm = 61.8490, GNorm = 1.3127, lr_0 = 2.2632e-04
Loss = 3.3775e-01, PNorm = 61.8482, GNorm = 1.2501, lr_0 = 2.2617e-04
Loss = 3.1224e-01, PNorm = 61.8508, GNorm = 1.6731, lr_0 = 2.2601e-04
Loss = 3.4262e-01, PNorm = 61.8514, GNorm = 1.1905, lr_0 = 2.2586e-04
Loss = 3.4849e-01, PNorm = 61.8530, GNorm = 1.4485, lr_0 = 2.2571e-04
Loss = 3.4659e-01, PNorm = 61.8536, GNorm = 1.3469, lr_0 = 2.2555e-04
Loss = 4.1388e-01, PNorm = 61.8587, GNorm = 2.5355, lr_0 = 2.2540e-04
Loss = 3.5636e-01, PNorm = 61.8631, GNorm = 1.4662, lr_0 = 2.2524e-04
Loss = 4.2537e-01, PNorm = 61.8654, GNorm = 1.6412, lr_0 = 2.2509e-04
Loss = 3.4667e-01, PNorm = 61.8689, GNorm = 1.8236, lr_0 = 2.2493e-04
Loss = 3.7262e-01, PNorm = 61.8717, GNorm = 1.7290, lr_0 = 2.2478e-04
Loss = 3.8044e-01, PNorm = 61.8752, GNorm = 1.5231, lr_0 = 2.2463e-04
Loss = 3.4654e-01, PNorm = 61.8765, GNorm = 1.4284, lr_0 = 2.2447e-04
Loss = 4.2339e-01, PNorm = 61.8753, GNorm = 1.6179, lr_0 = 2.2432e-04
Loss = 3.4078e-01, PNorm = 61.8774, GNorm = 1.1949, lr_0 = 2.2416e-04
Loss = 3.7841e-01, PNorm = 61.8810, GNorm = 1.4579, lr_0 = 2.2401e-04
Loss = 3.7000e-01, PNorm = 61.8840, GNorm = 1.7177, lr_0 = 2.2386e-04
Loss = 3.4736e-01, PNorm = 61.8864, GNorm = 1.2596, lr_0 = 2.2370e-04
Loss = 3.4638e-01, PNorm = 61.8933, GNorm = 1.5614, lr_0 = 2.2355e-04
Loss = 3.4818e-01, PNorm = 61.8959, GNorm = 1.9660, lr_0 = 2.2340e-04
Loss = 3.5542e-01, PNorm = 61.8973, GNorm = 1.5636, lr_0 = 2.2324e-04
Loss = 3.4745e-01, PNorm = 61.8989, GNorm = 1.7814, lr_0 = 2.2309e-04
Loss = 3.6879e-01, PNorm = 61.9023, GNorm = 1.3096, lr_0 = 2.2294e-04
Loss = 3.3345e-01, PNorm = 61.9051, GNorm = 1.1598, lr_0 = 2.2279e-04
Loss = 3.8870e-01, PNorm = 61.9052, GNorm = 1.8187, lr_0 = 2.2263e-04
Loss = 3.2149e-01, PNorm = 61.9097, GNorm = 1.5510, lr_0 = 2.2248e-04
Loss = 4.0520e-01, PNorm = 61.9110, GNorm = 1.2962, lr_0 = 2.2233e-04
Loss = 3.4667e-01, PNorm = 61.9149, GNorm = 1.6011, lr_0 = 2.2218e-04
Loss = 3.6991e-01, PNorm = 61.9155, GNorm = 1.2767, lr_0 = 2.2202e-04
Loss = 3.2377e-01, PNorm = 61.9152, GNorm = 1.2097, lr_0 = 2.2187e-04
Loss = 3.3633e-01, PNorm = 61.9180, GNorm = 1.2701, lr_0 = 2.2172e-04
Loss = 3.6269e-01, PNorm = 61.9207, GNorm = 1.8538, lr_0 = 2.2157e-04
Loss = 3.8168e-01, PNorm = 61.9236, GNorm = 1.9938, lr_0 = 2.2142e-04
Loss = 3.9381e-01, PNorm = 61.9273, GNorm = 1.7011, lr_0 = 2.2126e-04
Loss = 3.6311e-01, PNorm = 61.9307, GNorm = 2.0157, lr_0 = 2.2111e-04
Loss = 3.5508e-01, PNorm = 61.9343, GNorm = 1.2416, lr_0 = 2.2096e-04
Loss = 3.4895e-01, PNorm = 61.9383, GNorm = 1.8399, lr_0 = 2.2081e-04
Loss = 3.8081e-01, PNorm = 61.9384, GNorm = 1.1667, lr_0 = 2.2066e-04
Loss = 3.8128e-01, PNorm = 61.9406, GNorm = 1.5215, lr_0 = 2.2051e-04
Loss = 3.9063e-01, PNorm = 61.9420, GNorm = 1.3488, lr_0 = 2.2036e-04
Loss = 3.9283e-01, PNorm = 61.9416, GNorm = 1.2992, lr_0 = 2.2021e-04
Loss = 3.2874e-01, PNorm = 61.9439, GNorm = 1.4258, lr_0 = 2.2005e-04
Loss = 4.0793e-01, PNorm = 61.9489, GNorm = 2.2996, lr_0 = 2.1990e-04
Loss = 3.2909e-01, PNorm = 61.9523, GNorm = 1.0780, lr_0 = 2.1975e-04
Loss = 3.8972e-01, PNorm = 61.9559, GNorm = 1.2624, lr_0 = 2.1960e-04
Loss = 3.9161e-01, PNorm = 61.9570, GNorm = 1.4508, lr_0 = 2.1945e-04
Loss = 3.2194e-01, PNorm = 61.9585, GNorm = 1.2475, lr_0 = 2.1930e-04
Loss = 4.0700e-01, PNorm = 61.9634, GNorm = 1.9264, lr_0 = 2.1915e-04
Loss = 3.7362e-01, PNorm = 61.9635, GNorm = 1.4978, lr_0 = 2.1900e-04
Loss = 3.3743e-01, PNorm = 61.9647, GNorm = 1.2518, lr_0 = 2.1885e-04
Loss = 3.6461e-01, PNorm = 61.9673, GNorm = 1.4647, lr_0 = 2.1870e-04
Loss = 3.9799e-01, PNorm = 61.9701, GNorm = 1.1784, lr_0 = 2.1855e-04
Loss = 4.0257e-01, PNorm = 61.9730, GNorm = 2.0022, lr_0 = 2.1840e-04
Loss = 3.8514e-01, PNorm = 61.9762, GNorm = 1.6332, lr_0 = 2.1825e-04
Loss = 3.1513e-01, PNorm = 61.9798, GNorm = 1.4463, lr_0 = 2.1810e-04
Loss = 3.5609e-01, PNorm = 61.9803, GNorm = 1.1290, lr_0 = 2.1795e-04
Loss = 3.8137e-01, PNorm = 61.9836, GNorm = 1.3759, lr_0 = 2.1780e-04
Loss = 3.7173e-01, PNorm = 61.9856, GNorm = 1.5386, lr_0 = 2.1765e-04
Loss = 3.4115e-01, PNorm = 61.9865, GNorm = 1.4148, lr_0 = 2.1751e-04
Loss = 3.5891e-01, PNorm = 61.9875, GNorm = 2.0487, lr_0 = 2.1736e-04
Loss = 3.6003e-01, PNorm = 61.9870, GNorm = 1.5058, lr_0 = 2.1721e-04
Loss = 4.0943e-01, PNorm = 61.9900, GNorm = 1.0509, lr_0 = 2.1706e-04
Loss = 4.2910e-01, PNorm = 61.9942, GNorm = 1.8724, lr_0 = 2.1691e-04
Loss = 3.7583e-01, PNorm = 61.9984, GNorm = 1.4874, lr_0 = 2.1676e-04
Loss = 3.2787e-01, PNorm = 61.9999, GNorm = 1.3826, lr_0 = 2.1661e-04
Loss = 3.7981e-01, PNorm = 62.0018, GNorm = 1.3753, lr_0 = 2.1646e-04
Loss = 3.9138e-01, PNorm = 62.0066, GNorm = 1.3507, lr_0 = 2.1632e-04
Loss = 3.6807e-01, PNorm = 62.0087, GNorm = 1.7988, lr_0 = 2.1617e-04
Loss = 3.5337e-01, PNorm = 62.0116, GNorm = 1.0481, lr_0 = 2.1602e-04
Loss = 3.9522e-01, PNorm = 62.0108, GNorm = 1.4321, lr_0 = 2.1587e-04
Loss = 3.7549e-01, PNorm = 62.0111, GNorm = 1.4858, lr_0 = 2.1572e-04
Loss = 3.9913e-01, PNorm = 62.0147, GNorm = 1.3357, lr_0 = 2.1558e-04
Loss = 3.9026e-01, PNorm = 62.0208, GNorm = 1.2297, lr_0 = 2.1543e-04
Loss = 3.1921e-01, PNorm = 62.0240, GNorm = 1.4842, lr_0 = 2.1528e-04
Loss = 4.0336e-01, PNorm = 62.0240, GNorm = 1.9370, lr_0 = 2.1513e-04
Loss = 3.5527e-01, PNorm = 62.0272, GNorm = 1.3831, lr_0 = 2.1499e-04
Loss = 3.9471e-01, PNorm = 62.0300, GNorm = 1.3947, lr_0 = 2.1484e-04
Loss = 3.6586e-01, PNorm = 62.0337, GNorm = 0.9474, lr_0 = 2.1469e-04
Loss = 4.0632e-01, PNorm = 62.0334, GNorm = 1.5956, lr_0 = 2.1454e-04
Loss = 2.9975e-01, PNorm = 62.0339, GNorm = 1.5306, lr_0 = 2.1440e-04
Loss = 3.4959e-01, PNorm = 62.0336, GNorm = 1.2709, lr_0 = 2.1425e-04
Loss = 3.2405e-01, PNorm = 62.0372, GNorm = 0.9502, lr_0 = 2.1410e-04
Loss = 3.3978e-01, PNorm = 62.0397, GNorm = 1.4716, lr_0 = 2.1396e-04
Loss = 3.6900e-01, PNorm = 62.0398, GNorm = 1.3674, lr_0 = 2.1381e-04
Loss = 3.4430e-01, PNorm = 62.0444, GNorm = 1.4890, lr_0 = 2.1366e-04
Loss = 3.7350e-01, PNorm = 62.0465, GNorm = 1.3085, lr_0 = 2.1352e-04
Loss = 4.0015e-01, PNorm = 62.0464, GNorm = 2.3093, lr_0 = 2.1337e-04
Loss = 4.1160e-01, PNorm = 62.0498, GNorm = 1.4922, lr_0 = 2.1323e-04
Loss = 3.2885e-01, PNorm = 62.0526, GNorm = 2.2593, lr_0 = 2.1308e-04
Loss = 3.6492e-01, PNorm = 62.0513, GNorm = 1.3243, lr_0 = 2.1293e-04
Loss = 3.5675e-01, PNorm = 62.0531, GNorm = 1.9345, lr_0 = 2.1279e-04
Loss = 4.2657e-01, PNorm = 62.0575, GNorm = 1.5113, lr_0 = 2.1264e-04
Loss = 3.4153e-01, PNorm = 62.0591, GNorm = 1.8669, lr_0 = 2.1250e-04
Loss = 3.6849e-01, PNorm = 62.0619, GNorm = 1.3655, lr_0 = 2.1235e-04
Loss = 3.3604e-01, PNorm = 62.0643, GNorm = 1.1084, lr_0 = 2.1221e-04
Loss = 3.1486e-01, PNorm = 62.0672, GNorm = 1.1648, lr_0 = 2.1206e-04
Loss = 3.9842e-01, PNorm = 62.0680, GNorm = 1.2469, lr_0 = 2.1191e-04
Loss = 3.6480e-01, PNorm = 62.0695, GNorm = 2.1914, lr_0 = 2.1177e-04
Loss = 3.3621e-01, PNorm = 62.0760, GNorm = 1.4561, lr_0 = 2.1162e-04
Loss = 3.0597e-01, PNorm = 62.0778, GNorm = 1.3866, lr_0 = 2.1148e-04
Loss = 3.6589e-01, PNorm = 62.0799, GNorm = 2.0959, lr_0 = 2.1133e-04
Loss = 3.9247e-01, PNorm = 62.0804, GNorm = 1.2413, lr_0 = 2.1119e-04
Loss = 3.7959e-01, PNorm = 62.0823, GNorm = 1.5263, lr_0 = 2.1104e-04
Loss = 3.9112e-01, PNorm = 62.0852, GNorm = 1.2972, lr_0 = 2.1090e-04
Loss = 3.8218e-01, PNorm = 62.0870, GNorm = 1.5297, lr_0 = 2.1076e-04
Loss = 3.5835e-01, PNorm = 62.0902, GNorm = 1.4258, lr_0 = 2.1061e-04
Loss = 3.8306e-01, PNorm = 62.0928, GNorm = 1.3222, lr_0 = 2.1047e-04
Loss = 3.2510e-01, PNorm = 62.0944, GNorm = 1.3056, lr_0 = 2.1032e-04
Loss = 3.3560e-01, PNorm = 62.0966, GNorm = 1.6919, lr_0 = 2.1018e-04
Loss = 3.5670e-01, PNorm = 62.0961, GNorm = 1.1841, lr_0 = 2.1003e-04
Loss = 3.4842e-01, PNorm = 62.0991, GNorm = 1.0848, lr_0 = 2.0989e-04
Loss = 3.5079e-01, PNorm = 62.1046, GNorm = 1.4042, lr_0 = 2.0975e-04
Loss = 4.1754e-01, PNorm = 62.1058, GNorm = 2.3506, lr_0 = 2.0960e-04
Validation mae = 0.112197
Epoch 21
Loss = 3.3057e-01, PNorm = 62.1052, GNorm = 1.0540, lr_0 = 2.0946e-04
Loss = 3.8390e-01, PNorm = 62.1079, GNorm = 1.1374, lr_0 = 2.0932e-04
Loss = 3.4941e-01, PNorm = 62.1099, GNorm = 1.2341, lr_0 = 2.0917e-04
Loss = 3.5278e-01, PNorm = 62.1129, GNorm = 1.1104, lr_0 = 2.0903e-04
Loss = 3.5234e-01, PNorm = 62.1142, GNorm = 1.7454, lr_0 = 2.0889e-04
Loss = 3.8232e-01, PNorm = 62.1189, GNorm = 1.9790, lr_0 = 2.0874e-04
Loss = 3.8094e-01, PNorm = 62.1225, GNorm = 1.4312, lr_0 = 2.0860e-04
Loss = 3.3430e-01, PNorm = 62.1255, GNorm = 1.5504, lr_0 = 2.0846e-04
Loss = 3.4799e-01, PNorm = 62.1259, GNorm = 1.6616, lr_0 = 2.0831e-04
Loss = 3.5946e-01, PNorm = 62.1297, GNorm = 1.3825, lr_0 = 2.0817e-04
Loss = 3.0552e-01, PNorm = 62.1333, GNorm = 1.4381, lr_0 = 2.0803e-04
Loss = 3.8308e-01, PNorm = 62.1332, GNorm = 1.9139, lr_0 = 2.0789e-04
Loss = 3.2464e-01, PNorm = 62.1370, GNorm = 1.6572, lr_0 = 2.0774e-04
Loss = 3.2925e-01, PNorm = 62.1386, GNorm = 1.5241, lr_0 = 2.0760e-04
Loss = 3.5809e-01, PNorm = 62.1399, GNorm = 1.6244, lr_0 = 2.0746e-04
Loss = 4.0472e-01, PNorm = 62.1426, GNorm = 1.4884, lr_0 = 2.0732e-04
Loss = 4.1486e-01, PNorm = 62.1425, GNorm = 1.5079, lr_0 = 2.0718e-04
Loss = 3.7181e-01, PNorm = 62.1468, GNorm = 1.1798, lr_0 = 2.0703e-04
Loss = 3.5039e-01, PNorm = 62.1500, GNorm = 1.2949, lr_0 = 2.0689e-04
Loss = 3.4609e-01, PNorm = 62.1512, GNorm = 1.2435, lr_0 = 2.0675e-04
Loss = 3.5536e-01, PNorm = 62.1526, GNorm = 1.9967, lr_0 = 2.0661e-04
Loss = 3.6743e-01, PNorm = 62.1581, GNorm = 1.5866, lr_0 = 2.0647e-04
Loss = 3.6364e-01, PNorm = 62.1587, GNorm = 1.5554, lr_0 = 2.0633e-04
Loss = 4.1347e-01, PNorm = 62.1647, GNorm = 2.4065, lr_0 = 2.0618e-04
Loss = 3.6587e-01, PNorm = 62.1676, GNorm = 1.6011, lr_0 = 2.0604e-04
Loss = 3.7779e-01, PNorm = 62.1715, GNorm = 1.8414, lr_0 = 2.0590e-04
Loss = 3.2792e-01, PNorm = 62.1745, GNorm = 1.3229, lr_0 = 2.0576e-04
Loss = 3.1156e-01, PNorm = 62.1762, GNorm = 1.5297, lr_0 = 2.0562e-04
Loss = 3.7382e-01, PNorm = 62.1745, GNorm = 1.4818, lr_0 = 2.0548e-04
Loss = 3.5852e-01, PNorm = 62.1743, GNorm = 1.1670, lr_0 = 2.0534e-04
Loss = 3.6630e-01, PNorm = 62.1766, GNorm = 1.2325, lr_0 = 2.0520e-04
Loss = 3.7326e-01, PNorm = 62.1765, GNorm = 1.5057, lr_0 = 2.0506e-04
Loss = 3.9310e-01, PNorm = 62.1782, GNorm = 1.0436, lr_0 = 2.0492e-04
Loss = 3.5064e-01, PNorm = 62.1815, GNorm = 1.3858, lr_0 = 2.0478e-04
Loss = 3.7647e-01, PNorm = 62.1839, GNorm = 1.4765, lr_0 = 2.0464e-04
Loss = 3.6490e-01, PNorm = 62.1862, GNorm = 1.0918, lr_0 = 2.0450e-04
Loss = 3.7873e-01, PNorm = 62.1880, GNorm = 1.1934, lr_0 = 2.0436e-04
Loss = 4.1011e-01, PNorm = 62.1890, GNorm = 2.0069, lr_0 = 2.0422e-04
Loss = 4.0518e-01, PNorm = 62.1905, GNorm = 1.2011, lr_0 = 2.0408e-04
Loss = 4.0490e-01, PNorm = 62.1927, GNorm = 1.7616, lr_0 = 2.0394e-04
Loss = 3.4027e-01, PNorm = 62.1960, GNorm = 1.8922, lr_0 = 2.0380e-04
Loss = 3.5324e-01, PNorm = 62.1995, GNorm = 1.2833, lr_0 = 2.0366e-04
Loss = 3.7538e-01, PNorm = 62.2052, GNorm = 2.7871, lr_0 = 2.0352e-04
Loss = 3.4409e-01, PNorm = 62.2081, GNorm = 1.8919, lr_0 = 2.0338e-04
Loss = 3.8294e-01, PNorm = 62.2063, GNorm = 1.8453, lr_0 = 2.0324e-04
Loss = 3.7632e-01, PNorm = 62.2062, GNorm = 1.2934, lr_0 = 2.0310e-04
Loss = 3.7824e-01, PNorm = 62.2090, GNorm = 1.2823, lr_0 = 2.0296e-04
Loss = 3.4315e-01, PNorm = 62.2117, GNorm = 1.6049, lr_0 = 2.0282e-04
Loss = 3.6630e-01, PNorm = 62.2129, GNorm = 1.0086, lr_0 = 2.0268e-04
Loss = 3.3330e-01, PNorm = 62.2140, GNorm = 1.4533, lr_0 = 2.0254e-04
Loss = 3.6914e-01, PNorm = 62.2164, GNorm = 1.5951, lr_0 = 2.0240e-04
Loss = 3.4633e-01, PNorm = 62.2190, GNorm = 1.2175, lr_0 = 2.0227e-04
Loss = 3.6062e-01, PNorm = 62.2217, GNorm = 1.2201, lr_0 = 2.0213e-04
Loss = 3.7213e-01, PNorm = 62.2253, GNorm = 2.2878, lr_0 = 2.0199e-04
Loss = 3.6837e-01, PNorm = 62.2294, GNorm = 1.0611, lr_0 = 2.0185e-04
Loss = 3.8221e-01, PNorm = 62.2298, GNorm = 1.8236, lr_0 = 2.0171e-04
Loss = 3.7925e-01, PNorm = 62.2343, GNorm = 1.2414, lr_0 = 2.0157e-04
Loss = 3.4198e-01, PNorm = 62.2373, GNorm = 1.3779, lr_0 = 2.0144e-04
Loss = 3.2070e-01, PNorm = 62.2377, GNorm = 1.3442, lr_0 = 2.0130e-04
Loss = 3.8445e-01, PNorm = 62.2396, GNorm = 1.2450, lr_0 = 2.0116e-04
Loss = 3.4602e-01, PNorm = 62.2422, GNorm = 1.1749, lr_0 = 2.0102e-04
Loss = 3.9197e-01, PNorm = 62.2472, GNorm = 1.5539, lr_0 = 2.0088e-04
Loss = 3.2010e-01, PNorm = 62.2501, GNorm = 1.2403, lr_0 = 2.0075e-04
Loss = 3.1760e-01, PNorm = 62.2515, GNorm = 1.4354, lr_0 = 2.0061e-04
Loss = 2.7750e-01, PNorm = 62.2545, GNorm = 1.1891, lr_0 = 2.0047e-04
Loss = 3.3654e-01, PNorm = 62.2561, GNorm = 1.4287, lr_0 = 2.0033e-04
Loss = 3.4777e-01, PNorm = 62.2560, GNorm = 1.5414, lr_0 = 2.0020e-04
Loss = 3.4450e-01, PNorm = 62.2588, GNorm = 1.6274, lr_0 = 2.0006e-04
Loss = 3.5582e-01, PNorm = 62.2580, GNorm = 1.5864, lr_0 = 1.9992e-04
Loss = 3.6698e-01, PNorm = 62.2575, GNorm = 2.1554, lr_0 = 1.9979e-04
Loss = 3.3293e-01, PNorm = 62.2617, GNorm = 1.1900, lr_0 = 1.9965e-04
Loss = 3.7017e-01, PNorm = 62.2623, GNorm = 1.3220, lr_0 = 1.9951e-04
Loss = 3.8223e-01, PNorm = 62.2663, GNorm = 1.7165, lr_0 = 1.9938e-04
Loss = 3.5691e-01, PNorm = 62.2702, GNorm = 1.2848, lr_0 = 1.9924e-04
Loss = 3.1739e-01, PNorm = 62.2721, GNorm = 1.4151, lr_0 = 1.9910e-04
Loss = 3.3149e-01, PNorm = 62.2735, GNorm = 1.7116, lr_0 = 1.9897e-04
Loss = 3.6262e-01, PNorm = 62.2740, GNorm = 1.6831, lr_0 = 1.9883e-04
Loss = 3.7352e-01, PNorm = 62.2749, GNorm = 1.6057, lr_0 = 1.9869e-04
Loss = 4.1272e-01, PNorm = 62.2745, GNorm = 1.6283, lr_0 = 1.9856e-04
Loss = 3.4961e-01, PNorm = 62.2763, GNorm = 1.3550, lr_0 = 1.9842e-04
Loss = 3.6004e-01, PNorm = 62.2786, GNorm = 0.8054, lr_0 = 1.9829e-04
Loss = 4.0450e-01, PNorm = 62.2797, GNorm = 1.3615, lr_0 = 1.9815e-04
Loss = 3.7918e-01, PNorm = 62.2820, GNorm = 1.4096, lr_0 = 1.9801e-04
Loss = 3.9395e-01, PNorm = 62.2830, GNorm = 1.9431, lr_0 = 1.9788e-04
Loss = 4.1975e-01, PNorm = 62.2840, GNorm = 1.6706, lr_0 = 1.9774e-04
Loss = 4.3800e-01, PNorm = 62.2857, GNorm = 1.6432, lr_0 = 1.9761e-04
Loss = 3.5894e-01, PNorm = 62.2862, GNorm = 1.6022, lr_0 = 1.9747e-04
Loss = 3.5883e-01, PNorm = 62.2910, GNorm = 1.9246, lr_0 = 1.9734e-04
Loss = 3.5647e-01, PNorm = 62.2916, GNorm = 1.4151, lr_0 = 1.9720e-04
Loss = 3.8771e-01, PNorm = 62.2914, GNorm = 1.3427, lr_0 = 1.9707e-04
Loss = 3.8281e-01, PNorm = 62.2929, GNorm = 1.5349, lr_0 = 1.9693e-04
Loss = 3.5793e-01, PNorm = 62.2939, GNorm = 1.7259, lr_0 = 1.9680e-04
Loss = 3.0009e-01, PNorm = 62.2930, GNorm = 1.0831, lr_0 = 1.9666e-04
Loss = 3.7652e-01, PNorm = 62.2931, GNorm = 1.0730, lr_0 = 1.9653e-04
Loss = 4.3178e-01, PNorm = 62.2950, GNorm = 1.6734, lr_0 = 1.9639e-04
Loss = 3.5183e-01, PNorm = 62.2984, GNorm = 1.3366, lr_0 = 1.9626e-04
Loss = 3.2769e-01, PNorm = 62.3032, GNorm = 1.2880, lr_0 = 1.9612e-04
Loss = 3.3775e-01, PNorm = 62.3036, GNorm = 1.9655, lr_0 = 1.9599e-04
Loss = 3.7706e-01, PNorm = 62.3066, GNorm = 1.0189, lr_0 = 1.9585e-04
Loss = 3.5086e-01, PNorm = 62.3098, GNorm = 1.5810, lr_0 = 1.9572e-04
Loss = 3.6097e-01, PNorm = 62.3113, GNorm = 1.6715, lr_0 = 1.9559e-04
Loss = 3.3506e-01, PNorm = 62.3133, GNorm = 1.4050, lr_0 = 1.9545e-04
Loss = 3.7672e-01, PNorm = 62.3155, GNorm = 1.2711, lr_0 = 1.9532e-04
Loss = 3.2633e-01, PNorm = 62.3188, GNorm = 1.4265, lr_0 = 1.9518e-04
Loss = 3.6905e-01, PNorm = 62.3200, GNorm = 1.3103, lr_0 = 1.9505e-04
Loss = 4.0293e-01, PNorm = 62.3206, GNorm = 1.5649, lr_0 = 1.9492e-04
Loss = 3.3404e-01, PNorm = 62.3231, GNorm = 1.7893, lr_0 = 1.9478e-04
Loss = 3.5244e-01, PNorm = 62.3235, GNorm = 1.7127, lr_0 = 1.9465e-04
Loss = 3.7554e-01, PNorm = 62.3252, GNorm = 1.4037, lr_0 = 1.9452e-04
Loss = 3.5100e-01, PNorm = 62.3311, GNorm = 1.5732, lr_0 = 1.9438e-04
Loss = 3.6822e-01, PNorm = 62.3375, GNorm = 1.0034, lr_0 = 1.9425e-04
Loss = 3.7947e-01, PNorm = 62.3381, GNorm = 1.4454, lr_0 = 1.9412e-04
Loss = 3.1686e-01, PNorm = 62.3408, GNorm = 0.9822, lr_0 = 1.9398e-04
Loss = 3.8435e-01, PNorm = 62.3416, GNorm = 1.4639, lr_0 = 1.9385e-04
Loss = 3.6548e-01, PNorm = 62.3419, GNorm = 1.7480, lr_0 = 1.9372e-04
Loss = 3.2916e-01, PNorm = 62.3439, GNorm = 1.8629, lr_0 = 1.9359e-04
Loss = 3.3818e-01, PNorm = 62.3439, GNorm = 1.3031, lr_0 = 1.9345e-04
Loss = 4.2376e-01, PNorm = 62.3454, GNorm = 1.2946, lr_0 = 1.9332e-04
Loss = 3.5711e-01, PNorm = 62.3498, GNorm = 1.2796, lr_0 = 1.9319e-04
Loss = 3.3761e-01, PNorm = 62.3523, GNorm = 1.9568, lr_0 = 1.9306e-04
Validation mae = 0.111764
Epoch 22
Loss = 3.5635e-01, PNorm = 62.3523, GNorm = 1.3110, lr_0 = 1.9292e-04
Loss = 3.4600e-01, PNorm = 62.3541, GNorm = 1.6164, lr_0 = 1.9279e-04
Loss = 3.3237e-01, PNorm = 62.3582, GNorm = 1.0108, lr_0 = 1.9266e-04
Loss = 3.5885e-01, PNorm = 62.3594, GNorm = 1.2772, lr_0 = 1.9253e-04
Loss = 3.5471e-01, PNorm = 62.3624, GNorm = 1.4438, lr_0 = 1.9240e-04
Loss = 3.1636e-01, PNorm = 62.3661, GNorm = 1.1605, lr_0 = 1.9226e-04
Loss = 3.2052e-01, PNorm = 62.3679, GNorm = 1.3456, lr_0 = 1.9213e-04
Loss = 3.2498e-01, PNorm = 62.3708, GNorm = 1.6346, lr_0 = 1.9200e-04
Loss = 3.1476e-01, PNorm = 62.3746, GNorm = 1.2113, lr_0 = 1.9187e-04
Loss = 3.2687e-01, PNorm = 62.3758, GNorm = 1.2264, lr_0 = 1.9174e-04
Loss = 3.4262e-01, PNorm = 62.3767, GNorm = 1.7110, lr_0 = 1.9161e-04
Loss = 3.2146e-01, PNorm = 62.3783, GNorm = 1.5179, lr_0 = 1.9148e-04
Loss = 3.5147e-01, PNorm = 62.3799, GNorm = 0.8812, lr_0 = 1.9134e-04
Loss = 3.8025e-01, PNorm = 62.3819, GNorm = 1.7116, lr_0 = 1.9121e-04
Loss = 3.5601e-01, PNorm = 62.3844, GNorm = 1.3971, lr_0 = 1.9108e-04
Loss = 3.3364e-01, PNorm = 62.3865, GNorm = 0.9953, lr_0 = 1.9095e-04
Loss = 2.9909e-01, PNorm = 62.3861, GNorm = 1.2332, lr_0 = 1.9082e-04
Loss = 3.6338e-01, PNorm = 62.3854, GNorm = 1.2897, lr_0 = 1.9069e-04
Loss = 3.6973e-01, PNorm = 62.3892, GNorm = 1.6456, lr_0 = 1.9056e-04
Loss = 3.7790e-01, PNorm = 62.3921, GNorm = 1.8450, lr_0 = 1.9043e-04
Loss = 3.5724e-01, PNorm = 62.3943, GNorm = 2.3993, lr_0 = 1.9030e-04
Loss = 3.3578e-01, PNorm = 62.3974, GNorm = 1.5126, lr_0 = 1.9017e-04
Loss = 3.7007e-01, PNorm = 62.3998, GNorm = 1.1484, lr_0 = 1.9004e-04
Loss = 3.3201e-01, PNorm = 62.4009, GNorm = 1.3620, lr_0 = 1.8991e-04
Loss = 3.4732e-01, PNorm = 62.4015, GNorm = 1.3059, lr_0 = 1.8978e-04
Loss = 3.1864e-01, PNorm = 62.4032, GNorm = 1.3207, lr_0 = 1.8965e-04
Loss = 3.5331e-01, PNorm = 62.4056, GNorm = 1.4173, lr_0 = 1.8952e-04
Loss = 3.6305e-01, PNorm = 62.4073, GNorm = 1.8976, lr_0 = 1.8939e-04
Loss = 3.0902e-01, PNorm = 62.4095, GNorm = 1.2044, lr_0 = 1.8926e-04
Loss = 3.7035e-01, PNorm = 62.4112, GNorm = 1.3197, lr_0 = 1.8913e-04
Loss = 3.6029e-01, PNorm = 62.4137, GNorm = 0.7684, lr_0 = 1.8900e-04
Loss = 3.4817e-01, PNorm = 62.4153, GNorm = 1.2518, lr_0 = 1.8887e-04
Loss = 3.7501e-01, PNorm = 62.4178, GNorm = 1.9752, lr_0 = 1.8874e-04
Loss = 3.3567e-01, PNorm = 62.4172, GNorm = 1.1042, lr_0 = 1.8861e-04
Loss = 3.7082e-01, PNorm = 62.4182, GNorm = 1.5641, lr_0 = 1.8848e-04
Loss = 3.6620e-01, PNorm = 62.4214, GNorm = 1.6459, lr_0 = 1.8835e-04
Loss = 3.9650e-01, PNorm = 62.4240, GNorm = 1.6434, lr_0 = 1.8822e-04
Loss = 3.8483e-01, PNorm = 62.4279, GNorm = 1.4645, lr_0 = 1.8809e-04
Loss = 2.9316e-01, PNorm = 62.4318, GNorm = 1.7256, lr_0 = 1.8797e-04
Loss = 3.2907e-01, PNorm = 62.4367, GNorm = 2.0906, lr_0 = 1.8784e-04
Loss = 3.2079e-01, PNorm = 62.4379, GNorm = 1.8365, lr_0 = 1.8771e-04
Loss = 3.4913e-01, PNorm = 62.4395, GNorm = 1.6389, lr_0 = 1.8758e-04
Loss = 4.1456e-01, PNorm = 62.4429, GNorm = 1.3984, lr_0 = 1.8745e-04
Loss = 3.5070e-01, PNorm = 62.4443, GNorm = 2.5412, lr_0 = 1.8732e-04
Loss = 3.6753e-01, PNorm = 62.4452, GNorm = 2.1425, lr_0 = 1.8719e-04
Loss = 3.5257e-01, PNorm = 62.4469, GNorm = 1.3234, lr_0 = 1.8707e-04
Loss = 3.5990e-01, PNorm = 62.4477, GNorm = 1.4320, lr_0 = 1.8694e-04
Loss = 3.2835e-01, PNorm = 62.4512, GNorm = 1.5910, lr_0 = 1.8681e-04
Loss = 3.0741e-01, PNorm = 62.4551, GNorm = 1.9139, lr_0 = 1.8668e-04
Loss = 4.4280e-01, PNorm = 62.4602, GNorm = 1.6088, lr_0 = 1.8655e-04
Loss = 3.6332e-01, PNorm = 62.4597, GNorm = 1.3919, lr_0 = 1.8643e-04
Loss = 3.5839e-01, PNorm = 62.4604, GNorm = 1.5268, lr_0 = 1.8630e-04
Loss = 3.4649e-01, PNorm = 62.4631, GNorm = 1.4239, lr_0 = 1.8617e-04
Loss = 3.3755e-01, PNorm = 62.4647, GNorm = 1.2532, lr_0 = 1.8604e-04
Loss = 4.1302e-01, PNorm = 62.4610, GNorm = 2.0662, lr_0 = 1.8592e-04
Loss = 4.0012e-01, PNorm = 62.4625, GNorm = 1.6137, lr_0 = 1.8579e-04
Loss = 3.9153e-01, PNorm = 62.4650, GNorm = 1.9256, lr_0 = 1.8566e-04
Loss = 3.6841e-01, PNorm = 62.4667, GNorm = 1.4904, lr_0 = 1.8553e-04
Loss = 3.0597e-01, PNorm = 62.4682, GNorm = 1.4907, lr_0 = 1.8541e-04
Loss = 2.9721e-01, PNorm = 62.4701, GNorm = 1.3132, lr_0 = 1.8528e-04
Loss = 3.1367e-01, PNorm = 62.4730, GNorm = 1.4459, lr_0 = 1.8515e-04
Loss = 3.3892e-01, PNorm = 62.4756, GNorm = 2.0509, lr_0 = 1.8503e-04
Loss = 3.4366e-01, PNorm = 62.4760, GNorm = 1.4585, lr_0 = 1.8490e-04
Loss = 3.6701e-01, PNorm = 62.4788, GNorm = 2.1526, lr_0 = 1.8477e-04
Loss = 4.0301e-01, PNorm = 62.4796, GNorm = 1.8613, lr_0 = 1.8465e-04
Loss = 3.6392e-01, PNorm = 62.4805, GNorm = 1.3944, lr_0 = 1.8452e-04
Loss = 3.3361e-01, PNorm = 62.4814, GNorm = 1.0568, lr_0 = 1.8439e-04
Loss = 3.4008e-01, PNorm = 62.4859, GNorm = 1.4501, lr_0 = 1.8427e-04
Loss = 4.0923e-01, PNorm = 62.4887, GNorm = 1.0269, lr_0 = 1.8414e-04
Loss = 3.2620e-01, PNorm = 62.4894, GNorm = 1.5832, lr_0 = 1.8401e-04
Loss = 3.7012e-01, PNorm = 62.4910, GNorm = 1.3038, lr_0 = 1.8389e-04
Loss = 3.4490e-01, PNorm = 62.4943, GNorm = 1.4212, lr_0 = 1.8376e-04
Loss = 3.5430e-01, PNorm = 62.4968, GNorm = 2.5151, lr_0 = 1.8364e-04
Loss = 3.2998e-01, PNorm = 62.4980, GNorm = 1.2110, lr_0 = 1.8351e-04
Loss = 3.5876e-01, PNorm = 62.4995, GNorm = 1.3722, lr_0 = 1.8338e-04
Loss = 3.7880e-01, PNorm = 62.5002, GNorm = 1.4371, lr_0 = 1.8326e-04
Loss = 3.6475e-01, PNorm = 62.5025, GNorm = 1.3833, lr_0 = 1.8313e-04
Loss = 3.5046e-01, PNorm = 62.5032, GNorm = 1.4511, lr_0 = 1.8301e-04
Loss = 3.6859e-01, PNorm = 62.5022, GNorm = 1.4401, lr_0 = 1.8288e-04
Loss = 4.3268e-01, PNorm = 62.5022, GNorm = 1.3641, lr_0 = 1.8276e-04
Loss = 3.9022e-01, PNorm = 62.5062, GNorm = 1.8798, lr_0 = 1.8263e-04
Loss = 3.7928e-01, PNorm = 62.5098, GNorm = 2.3913, lr_0 = 1.8251e-04
Loss = 3.8231e-01, PNorm = 62.5118, GNorm = 2.1118, lr_0 = 1.8238e-04
Loss = 3.1770e-01, PNorm = 62.5126, GNorm = 1.5816, lr_0 = 1.8226e-04
Loss = 3.9114e-01, PNorm = 62.5122, GNorm = 1.1593, lr_0 = 1.8213e-04
Loss = 3.7593e-01, PNorm = 62.5145, GNorm = 1.2668, lr_0 = 1.8201e-04
Loss = 4.0475e-01, PNorm = 62.5193, GNorm = 1.6048, lr_0 = 1.8188e-04
Loss = 3.2857e-01, PNorm = 62.5225, GNorm = 1.4775, lr_0 = 1.8176e-04
Loss = 3.6798e-01, PNorm = 62.5237, GNorm = 2.7911, lr_0 = 1.8163e-04
Loss = 3.1374e-01, PNorm = 62.5267, GNorm = 1.3628, lr_0 = 1.8151e-04
Loss = 3.5433e-01, PNorm = 62.5255, GNorm = 1.2793, lr_0 = 1.8138e-04
Loss = 3.7010e-01, PNorm = 62.5268, GNorm = 1.4589, lr_0 = 1.8126e-04
Loss = 3.3995e-01, PNorm = 62.5284, GNorm = 1.4809, lr_0 = 1.8114e-04
Loss = 3.8156e-01, PNorm = 62.5290, GNorm = 2.1478, lr_0 = 1.8101e-04
Loss = 4.0154e-01, PNorm = 62.5301, GNorm = 1.5308, lr_0 = 1.8089e-04
Loss = 3.7360e-01, PNorm = 62.5328, GNorm = 1.6505, lr_0 = 1.8076e-04
Loss = 3.6550e-01, PNorm = 62.5361, GNorm = 1.5428, lr_0 = 1.8064e-04
Loss = 3.1884e-01, PNorm = 62.5388, GNorm = 1.7076, lr_0 = 1.8052e-04
Loss = 3.3173e-01, PNorm = 62.5403, GNorm = 1.2311, lr_0 = 1.8039e-04
Loss = 3.4485e-01, PNorm = 62.5398, GNorm = 1.6261, lr_0 = 1.8027e-04
Loss = 3.8354e-01, PNorm = 62.5406, GNorm = 1.5210, lr_0 = 1.8015e-04
Loss = 3.4284e-01, PNorm = 62.5406, GNorm = 1.6471, lr_0 = 1.8002e-04
Loss = 3.6701e-01, PNorm = 62.5437, GNorm = 1.3542, lr_0 = 1.7990e-04
Loss = 4.0038e-01, PNorm = 62.5449, GNorm = 1.3810, lr_0 = 1.7978e-04
Loss = 3.4056e-01, PNorm = 62.5454, GNorm = 1.7497, lr_0 = 1.7965e-04
Loss = 3.9408e-01, PNorm = 62.5493, GNorm = 1.3997, lr_0 = 1.7953e-04
Loss = 3.6372e-01, PNorm = 62.5535, GNorm = 1.1397, lr_0 = 1.7941e-04
Loss = 3.3998e-01, PNorm = 62.5559, GNorm = 1.4593, lr_0 = 1.7928e-04
Loss = 3.3340e-01, PNorm = 62.5565, GNorm = 1.8449, lr_0 = 1.7916e-04
Loss = 4.0629e-01, PNorm = 62.5565, GNorm = 1.8117, lr_0 = 1.7904e-04
Loss = 3.5306e-01, PNorm = 62.5586, GNorm = 1.3286, lr_0 = 1.7892e-04
Loss = 3.5135e-01, PNorm = 62.5592, GNorm = 2.0603, lr_0 = 1.7879e-04
Loss = 3.2393e-01, PNorm = 62.5603, GNorm = 1.8513, lr_0 = 1.7867e-04
Loss = 3.3257e-01, PNorm = 62.5617, GNorm = 1.4917, lr_0 = 1.7855e-04
Loss = 3.7106e-01, PNorm = 62.5638, GNorm = 1.3614, lr_0 = 1.7843e-04
Loss = 3.6773e-01, PNorm = 62.5655, GNorm = 1.9942, lr_0 = 1.7830e-04
Loss = 3.7734e-01, PNorm = 62.5666, GNorm = 1.7514, lr_0 = 1.7818e-04
Loss = 3.8195e-01, PNorm = 62.5701, GNorm = 1.7174, lr_0 = 1.7806e-04
Loss = 3.6571e-01, PNorm = 62.5710, GNorm = 1.7367, lr_0 = 1.7794e-04
Loss = 3.3753e-01, PNorm = 62.5707, GNorm = 1.6601, lr_0 = 1.7782e-04
Validation mae = 0.112160
Epoch 23
Loss = 3.4077e-01, PNorm = 62.5734, GNorm = 1.2598, lr_0 = 1.7769e-04
Loss = 3.0504e-01, PNorm = 62.5719, GNorm = 1.2337, lr_0 = 1.7757e-04
Loss = 3.9831e-01, PNorm = 62.5711, GNorm = 1.0470, lr_0 = 1.7745e-04
Loss = 4.0070e-01, PNorm = 62.5702, GNorm = 1.5226, lr_0 = 1.7733e-04
Loss = 3.3195e-01, PNorm = 62.5702, GNorm = 1.5125, lr_0 = 1.7721e-04
Loss = 3.4493e-01, PNorm = 62.5731, GNorm = 2.2291, lr_0 = 1.7709e-04
Loss = 3.2994e-01, PNorm = 62.5748, GNorm = 1.1450, lr_0 = 1.7696e-04
Loss = 3.6541e-01, PNorm = 62.5792, GNorm = 2.1349, lr_0 = 1.7684e-04
Loss = 3.2772e-01, PNorm = 62.5819, GNorm = 1.7020, lr_0 = 1.7672e-04
Loss = 3.7961e-01, PNorm = 62.5841, GNorm = 1.2394, lr_0 = 1.7660e-04
Loss = 3.1745e-01, PNorm = 62.5855, GNorm = 1.3929, lr_0 = 1.7648e-04
Loss = 3.6560e-01, PNorm = 62.5875, GNorm = 1.7234, lr_0 = 1.7636e-04
Loss = 3.3254e-01, PNorm = 62.5906, GNorm = 1.7874, lr_0 = 1.7624e-04
Loss = 4.0923e-01, PNorm = 62.5910, GNorm = 1.6062, lr_0 = 1.7612e-04
Loss = 3.0637e-01, PNorm = 62.5914, GNorm = 1.3288, lr_0 = 1.7600e-04
Loss = 3.5528e-01, PNorm = 62.5923, GNorm = 1.5064, lr_0 = 1.7588e-04
Loss = 3.6683e-01, PNorm = 62.5940, GNorm = 1.3514, lr_0 = 1.7576e-04
Loss = 3.2126e-01, PNorm = 62.5960, GNorm = 1.7243, lr_0 = 1.7564e-04
Loss = 3.3716e-01, PNorm = 62.5990, GNorm = 1.7307, lr_0 = 1.7552e-04
Loss = 3.6105e-01, PNorm = 62.6004, GNorm = 1.5373, lr_0 = 1.7540e-04
Loss = 3.2123e-01, PNorm = 62.5991, GNorm = 1.4614, lr_0 = 1.7528e-04
Loss = 3.5013e-01, PNorm = 62.5996, GNorm = 1.3756, lr_0 = 1.7516e-04
Loss = 2.8855e-01, PNorm = 62.6021, GNorm = 1.1989, lr_0 = 1.7504e-04
Loss = 3.3182e-01, PNorm = 62.6047, GNorm = 1.2517, lr_0 = 1.7492e-04
Loss = 3.4387e-01, PNorm = 62.6075, GNorm = 2.1854, lr_0 = 1.7480e-04
Loss = 3.2612e-01, PNorm = 62.6108, GNorm = 1.4846, lr_0 = 1.7468e-04
Loss = 3.6152e-01, PNorm = 62.6118, GNorm = 1.3851, lr_0 = 1.7456e-04
Loss = 3.6791e-01, PNorm = 62.6113, GNorm = 1.4765, lr_0 = 1.7444e-04
Loss = 3.6232e-01, PNorm = 62.6133, GNorm = 2.4522, lr_0 = 1.7432e-04
Loss = 3.5695e-01, PNorm = 62.6135, GNorm = 1.4770, lr_0 = 1.7420e-04
Loss = 3.6825e-01, PNorm = 62.6155, GNorm = 2.3676, lr_0 = 1.7408e-04
Loss = 3.3523e-01, PNorm = 62.6177, GNorm = 1.9817, lr_0 = 1.7396e-04
Loss = 3.8609e-01, PNorm = 62.6198, GNorm = 1.2693, lr_0 = 1.7384e-04
Loss = 3.5377e-01, PNorm = 62.6219, GNorm = 1.6920, lr_0 = 1.7372e-04
Loss = 4.8695e-01, PNorm = 62.6251, GNorm = 2.5457, lr_0 = 1.7360e-04
Loss = 3.2202e-01, PNorm = 62.6286, GNorm = 1.0470, lr_0 = 1.7348e-04
Loss = 3.6252e-01, PNorm = 62.6316, GNorm = 1.5126, lr_0 = 1.7336e-04
Loss = 4.1937e-01, PNorm = 62.6325, GNorm = 2.0302, lr_0 = 1.7325e-04
Loss = 3.1187e-01, PNorm = 62.6357, GNorm = 1.3873, lr_0 = 1.7313e-04
Loss = 3.2837e-01, PNorm = 62.6405, GNorm = 1.1886, lr_0 = 1.7301e-04
Loss = 3.6848e-01, PNorm = 62.6431, GNorm = 1.3918, lr_0 = 1.7289e-04
Loss = 3.3920e-01, PNorm = 62.6439, GNorm = 1.5625, lr_0 = 1.7277e-04
Loss = 3.6569e-01, PNorm = 62.6470, GNorm = 1.5842, lr_0 = 1.7265e-04
Loss = 3.7461e-01, PNorm = 62.6505, GNorm = 2.3595, lr_0 = 1.7253e-04
Loss = 3.7749e-01, PNorm = 62.6505, GNorm = 1.4627, lr_0 = 1.7242e-04
Loss = 3.3424e-01, PNorm = 62.6531, GNorm = 1.3056, lr_0 = 1.7230e-04
Loss = 3.7159e-01, PNorm = 62.6535, GNorm = 1.6382, lr_0 = 1.7218e-04
Loss = 3.3402e-01, PNorm = 62.6558, GNorm = 1.4203, lr_0 = 1.7206e-04
Loss = 3.6385e-01, PNorm = 62.6607, GNorm = 1.4279, lr_0 = 1.7194e-04
Loss = 3.2465e-01, PNorm = 62.6630, GNorm = 1.7468, lr_0 = 1.7183e-04
Loss = 3.2726e-01, PNorm = 62.6647, GNorm = 1.6538, lr_0 = 1.7171e-04
Loss = 3.4696e-01, PNorm = 62.6649, GNorm = 0.9714, lr_0 = 1.7159e-04
Loss = 4.0094e-01, PNorm = 62.6696, GNorm = 1.6459, lr_0 = 1.7147e-04
Loss = 3.8436e-01, PNorm = 62.6719, GNorm = 1.8761, lr_0 = 1.7136e-04
Loss = 3.4929e-01, PNorm = 62.6720, GNorm = 1.8281, lr_0 = 1.7124e-04
Loss = 3.0807e-01, PNorm = 62.6729, GNorm = 1.1601, lr_0 = 1.7112e-04
Loss = 3.5080e-01, PNorm = 62.6741, GNorm = 1.4477, lr_0 = 1.7100e-04
Loss = 3.2576e-01, PNorm = 62.6759, GNorm = 1.1221, lr_0 = 1.7089e-04
Loss = 3.8294e-01, PNorm = 62.6773, GNorm = 1.7453, lr_0 = 1.7077e-04
Loss = 3.3763e-01, PNorm = 62.6820, GNorm = 1.4847, lr_0 = 1.7065e-04
Loss = 3.1483e-01, PNorm = 62.6851, GNorm = 1.9562, lr_0 = 1.7054e-04
Loss = 3.1168e-01, PNorm = 62.6863, GNorm = 1.2685, lr_0 = 1.7042e-04
Loss = 4.1104e-01, PNorm = 62.6857, GNorm = 2.1105, lr_0 = 1.7030e-04
Loss = 3.3777e-01, PNorm = 62.6874, GNorm = 1.6750, lr_0 = 1.7019e-04
Loss = 3.4889e-01, PNorm = 62.6891, GNorm = 1.8785, lr_0 = 1.7007e-04
Loss = 3.6054e-01, PNorm = 62.6891, GNorm = 1.6913, lr_0 = 1.6995e-04
Loss = 3.6759e-01, PNorm = 62.6901, GNorm = 1.4077, lr_0 = 1.6984e-04
Loss = 3.4872e-01, PNorm = 62.6931, GNorm = 1.8777, lr_0 = 1.6972e-04
Loss = 3.3788e-01, PNorm = 62.6958, GNorm = 1.5833, lr_0 = 1.6960e-04
Loss = 3.5290e-01, PNorm = 62.6996, GNorm = 1.3114, lr_0 = 1.6949e-04
Loss = 3.2663e-01, PNorm = 62.7001, GNorm = 1.4040, lr_0 = 1.6937e-04
Loss = 3.7895e-01, PNorm = 62.7017, GNorm = 1.4866, lr_0 = 1.6926e-04
Loss = 3.5068e-01, PNorm = 62.7039, GNorm = 1.5974, lr_0 = 1.6914e-04
Loss = 3.8670e-01, PNorm = 62.7042, GNorm = 1.2069, lr_0 = 1.6902e-04
Loss = 3.3976e-01, PNorm = 62.7071, GNorm = 1.2315, lr_0 = 1.6891e-04
Loss = 3.3406e-01, PNorm = 62.7099, GNorm = 1.5298, lr_0 = 1.6879e-04
Loss = 3.6344e-01, PNorm = 62.7139, GNorm = 1.0818, lr_0 = 1.6868e-04
Loss = 3.6082e-01, PNorm = 62.7170, GNorm = 1.8092, lr_0 = 1.6856e-04
Loss = 3.7091e-01, PNorm = 62.7165, GNorm = 1.4979, lr_0 = 1.6845e-04
Loss = 3.7587e-01, PNorm = 62.7183, GNorm = 1.1646, lr_0 = 1.6833e-04
Loss = 3.3709e-01, PNorm = 62.7221, GNorm = 1.7855, lr_0 = 1.6821e-04
Loss = 3.2795e-01, PNorm = 62.7254, GNorm = 1.1933, lr_0 = 1.6810e-04
Loss = 3.6488e-01, PNorm = 62.7263, GNorm = 1.3591, lr_0 = 1.6798e-04
Loss = 3.6179e-01, PNorm = 62.7280, GNorm = 1.4546, lr_0 = 1.6787e-04
Loss = 4.3472e-01, PNorm = 62.7270, GNorm = 1.9180, lr_0 = 1.6775e-04
Loss = 3.4333e-01, PNorm = 62.7289, GNorm = 1.0716, lr_0 = 1.6764e-04
Loss = 3.4634e-01, PNorm = 62.7301, GNorm = 2.4344, lr_0 = 1.6752e-04
Loss = 3.4103e-01, PNorm = 62.7295, GNorm = 1.1484, lr_0 = 1.6741e-04
Loss = 3.4169e-01, PNorm = 62.7308, GNorm = 1.9626, lr_0 = 1.6729e-04
Loss = 3.8480e-01, PNorm = 62.7330, GNorm = 1.4831, lr_0 = 1.6718e-04
Loss = 3.4049e-01, PNorm = 62.7333, GNorm = 1.9343, lr_0 = 1.6707e-04
Loss = 3.7487e-01, PNorm = 62.7323, GNorm = 1.8715, lr_0 = 1.6695e-04
Loss = 3.3897e-01, PNorm = 62.7345, GNorm = 1.4995, lr_0 = 1.6684e-04
Loss = 4.2007e-01, PNorm = 62.7366, GNorm = 1.1437, lr_0 = 1.6672e-04
Loss = 4.0125e-01, PNorm = 62.7369, GNorm = 1.3013, lr_0 = 1.6661e-04
Loss = 3.2701e-01, PNorm = 62.7415, GNorm = 1.5891, lr_0 = 1.6649e-04
Loss = 3.9361e-01, PNorm = 62.7450, GNorm = 1.2779, lr_0 = 1.6638e-04
Loss = 3.9271e-01, PNorm = 62.7451, GNorm = 1.8185, lr_0 = 1.6627e-04
Loss = 3.6373e-01, PNorm = 62.7474, GNorm = 1.5914, lr_0 = 1.6615e-04
Loss = 3.3251e-01, PNorm = 62.7496, GNorm = 1.0417, lr_0 = 1.6604e-04
Loss = 3.5492e-01, PNorm = 62.7513, GNorm = 1.5826, lr_0 = 1.6592e-04
Loss = 3.7926e-01, PNorm = 62.7526, GNorm = 1.6933, lr_0 = 1.6581e-04
Loss = 3.9748e-01, PNorm = 62.7550, GNorm = 1.7467, lr_0 = 1.6570e-04
Loss = 3.7078e-01, PNorm = 62.7565, GNorm = 1.9544, lr_0 = 1.6558e-04
Loss = 3.7550e-01, PNorm = 62.7578, GNorm = 1.2607, lr_0 = 1.6547e-04
Loss = 3.5520e-01, PNorm = 62.7574, GNorm = 1.4011, lr_0 = 1.6536e-04
Loss = 3.7845e-01, PNorm = 62.7598, GNorm = 1.6159, lr_0 = 1.6524e-04
Loss = 3.7748e-01, PNorm = 62.7585, GNorm = 1.0924, lr_0 = 1.6513e-04
Loss = 4.3283e-01, PNorm = 62.7602, GNorm = 1.3238, lr_0 = 1.6502e-04
Loss = 3.6459e-01, PNorm = 62.7629, GNorm = 1.6819, lr_0 = 1.6490e-04
Loss = 3.4423e-01, PNorm = 62.7640, GNorm = 1.3471, lr_0 = 1.6479e-04
Loss = 3.2426e-01, PNorm = 62.7684, GNorm = 1.4227, lr_0 = 1.6468e-04
Loss = 3.4075e-01, PNorm = 62.7707, GNorm = 1.8319, lr_0 = 1.6457e-04
Loss = 3.3053e-01, PNorm = 62.7696, GNorm = 1.5346, lr_0 = 1.6445e-04
Loss = 4.2929e-01, PNorm = 62.7700, GNorm = 2.0242, lr_0 = 1.6434e-04
Loss = 3.2767e-01, PNorm = 62.7731, GNorm = 1.6523, lr_0 = 1.6423e-04
Loss = 3.3820e-01, PNorm = 62.7738, GNorm = 0.9055, lr_0 = 1.6412e-04
Loss = 3.6359e-01, PNorm = 62.7750, GNorm = 1.1776, lr_0 = 1.6400e-04
Loss = 3.7505e-01, PNorm = 62.7767, GNorm = 1.3109, lr_0 = 1.6389e-04
Loss = 3.7506e-01, PNorm = 62.7811, GNorm = 1.3889, lr_0 = 1.6378e-04
Validation mae = 0.110904
Epoch 24
Loss = 3.4797e-01, PNorm = 62.7841, GNorm = 1.5311, lr_0 = 1.6367e-04
Loss = 3.4298e-01, PNorm = 62.7848, GNorm = 1.6630, lr_0 = 1.6355e-04
Loss = 3.4007e-01, PNorm = 62.7849, GNorm = 1.6988, lr_0 = 1.6344e-04
Loss = 3.4248e-01, PNorm = 62.7844, GNorm = 1.8128, lr_0 = 1.6333e-04
Loss = 3.9713e-01, PNorm = 62.7854, GNorm = 1.3597, lr_0 = 1.6322e-04
Loss = 3.1342e-01, PNorm = 62.7867, GNorm = 1.1862, lr_0 = 1.6311e-04
Loss = 2.6967e-01, PNorm = 62.7889, GNorm = 1.3426, lr_0 = 1.6299e-04
Loss = 3.3533e-01, PNorm = 62.7890, GNorm = 1.3372, lr_0 = 1.6288e-04
Loss = 3.3435e-01, PNorm = 62.7918, GNorm = 1.5190, lr_0 = 1.6277e-04
Loss = 3.2987e-01, PNorm = 62.7933, GNorm = 1.9380, lr_0 = 1.6266e-04
Loss = 2.7837e-01, PNorm = 62.7929, GNorm = 1.3370, lr_0 = 1.6255e-04
Loss = 3.9138e-01, PNorm = 62.7954, GNorm = 1.3983, lr_0 = 1.6244e-04
Loss = 3.3817e-01, PNorm = 62.7972, GNorm = 1.5806, lr_0 = 1.6233e-04
Loss = 3.4221e-01, PNorm = 62.8008, GNorm = 1.3448, lr_0 = 1.6221e-04
Loss = 3.1056e-01, PNorm = 62.8029, GNorm = 1.3504, lr_0 = 1.6210e-04
Loss = 3.2947e-01, PNorm = 62.8053, GNorm = 1.1045, lr_0 = 1.6199e-04
Loss = 3.5537e-01, PNorm = 62.8049, GNorm = 1.8150, lr_0 = 1.6188e-04
Loss = 3.3440e-01, PNorm = 62.8045, GNorm = 1.4662, lr_0 = 1.6177e-04
Loss = 3.4291e-01, PNorm = 62.8062, GNorm = 1.2873, lr_0 = 1.6166e-04
Loss = 3.9431e-01, PNorm = 62.8082, GNorm = 1.3555, lr_0 = 1.6155e-04
Loss = 3.0898e-01, PNorm = 62.8119, GNorm = 1.2993, lr_0 = 1.6144e-04
Loss = 3.2801e-01, PNorm = 62.8117, GNorm = 1.4370, lr_0 = 1.6133e-04
Loss = 3.9700e-01, PNorm = 62.8109, GNorm = 1.3782, lr_0 = 1.6122e-04
Loss = 3.4678e-01, PNorm = 62.8127, GNorm = 2.0324, lr_0 = 1.6111e-04
Loss = 3.3344e-01, PNorm = 62.8140, GNorm = 1.4254, lr_0 = 1.6100e-04
Loss = 4.0118e-01, PNorm = 62.8141, GNorm = 1.3708, lr_0 = 1.6089e-04
Loss = 3.4973e-01, PNorm = 62.8148, GNorm = 1.1375, lr_0 = 1.6078e-04
Loss = 3.3802e-01, PNorm = 62.8168, GNorm = 1.3220, lr_0 = 1.6067e-04
Loss = 3.7361e-01, PNorm = 62.8191, GNorm = 1.6123, lr_0 = 1.6056e-04
Loss = 3.7458e-01, PNorm = 62.8225, GNorm = 1.2965, lr_0 = 1.6045e-04
Loss = 3.3133e-01, PNorm = 62.8255, GNorm = 1.1430, lr_0 = 1.6034e-04
Loss = 3.7749e-01, PNorm = 62.8244, GNorm = 1.9618, lr_0 = 1.6023e-04
Loss = 3.6656e-01, PNorm = 62.8255, GNorm = 1.5939, lr_0 = 1.6012e-04
Loss = 3.1321e-01, PNorm = 62.8276, GNorm = 2.1439, lr_0 = 1.6001e-04
Loss = 3.0985e-01, PNorm = 62.8290, GNorm = 1.6184, lr_0 = 1.5990e-04
Loss = 3.6674e-01, PNorm = 62.8311, GNorm = 1.2721, lr_0 = 1.5979e-04
Loss = 3.8466e-01, PNorm = 62.8339, GNorm = 1.7124, lr_0 = 1.5968e-04
Loss = 3.4977e-01, PNorm = 62.8365, GNorm = 1.6560, lr_0 = 1.5957e-04
Loss = 3.5573e-01, PNorm = 62.8354, GNorm = 1.8667, lr_0 = 1.5946e-04
Loss = 3.3291e-01, PNorm = 62.8356, GNorm = 1.4878, lr_0 = 1.5935e-04
Loss = 3.6483e-01, PNorm = 62.8397, GNorm = 1.7423, lr_0 = 1.5924e-04
Loss = 4.1273e-01, PNorm = 62.8433, GNorm = 1.2444, lr_0 = 1.5913e-04
Loss = 3.4977e-01, PNorm = 62.8444, GNorm = 2.0341, lr_0 = 1.5902e-04
Loss = 3.3071e-01, PNorm = 62.8468, GNorm = 1.3178, lr_0 = 1.5891e-04
Loss = 3.8053e-01, PNorm = 62.8485, GNorm = 2.0473, lr_0 = 1.5880e-04
Loss = 3.0998e-01, PNorm = 62.8496, GNorm = 1.2444, lr_0 = 1.5870e-04
Loss = 3.8608e-01, PNorm = 62.8512, GNorm = 1.3133, lr_0 = 1.5859e-04
Loss = 3.7467e-01, PNorm = 62.8538, GNorm = 1.3620, lr_0 = 1.5848e-04
Loss = 3.4956e-01, PNorm = 62.8567, GNorm = 1.5373, lr_0 = 1.5837e-04
Loss = 3.3135e-01, PNorm = 62.8560, GNorm = 1.0449, lr_0 = 1.5826e-04
Loss = 3.5894e-01, PNorm = 62.8552, GNorm = 1.5296, lr_0 = 1.5815e-04
Loss = 3.6167e-01, PNorm = 62.8557, GNorm = 1.4153, lr_0 = 1.5804e-04
Loss = 2.9376e-01, PNorm = 62.8594, GNorm = 1.2936, lr_0 = 1.5794e-04
Loss = 3.3941e-01, PNorm = 62.8614, GNorm = 1.3526, lr_0 = 1.5783e-04
Loss = 3.5546e-01, PNorm = 62.8620, GNorm = 1.1742, lr_0 = 1.5772e-04
Loss = 3.4340e-01, PNorm = 62.8650, GNorm = 1.3799, lr_0 = 1.5761e-04
Loss = 3.6243e-01, PNorm = 62.8652, GNorm = 1.2802, lr_0 = 1.5750e-04
Loss = 3.7907e-01, PNorm = 62.8679, GNorm = 1.3970, lr_0 = 1.5740e-04
Loss = 3.6249e-01, PNorm = 62.8702, GNorm = 1.4038, lr_0 = 1.5729e-04
Loss = 2.9954e-01, PNorm = 62.8722, GNorm = 1.2078, lr_0 = 1.5718e-04
Loss = 3.3682e-01, PNorm = 62.8736, GNorm = 1.2729, lr_0 = 1.5707e-04
Loss = 3.6181e-01, PNorm = 62.8752, GNorm = 1.3295, lr_0 = 1.5697e-04
Loss = 3.3933e-01, PNorm = 62.8757, GNorm = 1.3154, lr_0 = 1.5686e-04
Loss = 4.0630e-01, PNorm = 62.8771, GNorm = 1.4573, lr_0 = 1.5675e-04
Loss = 3.5153e-01, PNorm = 62.8802, GNorm = 1.2640, lr_0 = 1.5664e-04
Loss = 3.1948e-01, PNorm = 62.8837, GNorm = 1.5026, lr_0 = 1.5654e-04
Loss = 3.6788e-01, PNorm = 62.8833, GNorm = 3.3630, lr_0 = 1.5643e-04
Loss = 3.4307e-01, PNorm = 62.8845, GNorm = 1.1044, lr_0 = 1.5632e-04
Loss = 3.5857e-01, PNorm = 62.8891, GNorm = 1.7554, lr_0 = 1.5621e-04
Loss = 3.6421e-01, PNorm = 62.8931, GNorm = 1.3695, lr_0 = 1.5611e-04
Loss = 3.9881e-01, PNorm = 62.8920, GNorm = 2.1458, lr_0 = 1.5600e-04
Loss = 3.6035e-01, PNorm = 62.8923, GNorm = 2.1938, lr_0 = 1.5589e-04
Loss = 3.6889e-01, PNorm = 62.8905, GNorm = 1.9114, lr_0 = 1.5579e-04
Loss = 3.5663e-01, PNorm = 62.8897, GNorm = 1.2540, lr_0 = 1.5568e-04
Loss = 3.2430e-01, PNorm = 62.8895, GNorm = 1.4113, lr_0 = 1.5557e-04
Loss = 3.4022e-01, PNorm = 62.8914, GNorm = 1.6477, lr_0 = 1.5547e-04
Loss = 4.0976e-01, PNorm = 62.8903, GNorm = 1.4466, lr_0 = 1.5536e-04
Loss = 3.2248e-01, PNorm = 62.8921, GNorm = 1.1716, lr_0 = 1.5525e-04
Loss = 3.4873e-01, PNorm = 62.8952, GNorm = 1.2848, lr_0 = 1.5515e-04
Loss = 3.5966e-01, PNorm = 62.8969, GNorm = 1.2175, lr_0 = 1.5504e-04
Loss = 4.1107e-01, PNorm = 62.8974, GNorm = 1.6926, lr_0 = 1.5493e-04
Loss = 3.4475e-01, PNorm = 62.8980, GNorm = 1.1903, lr_0 = 1.5483e-04
Loss = 3.4886e-01, PNorm = 62.8980, GNorm = 1.5444, lr_0 = 1.5472e-04
Loss = 3.3561e-01, PNorm = 62.9013, GNorm = 1.1480, lr_0 = 1.5462e-04
Loss = 3.8042e-01, PNorm = 62.9045, GNorm = 1.1717, lr_0 = 1.5451e-04
Loss = 3.4415e-01, PNorm = 62.9054, GNorm = 1.2943, lr_0 = 1.5440e-04
Loss = 3.6609e-01, PNorm = 62.9046, GNorm = 1.8728, lr_0 = 1.5430e-04
Loss = 2.8979e-01, PNorm = 62.9069, GNorm = 1.4577, lr_0 = 1.5419e-04
Loss = 4.3803e-01, PNorm = 62.9078, GNorm = 1.9866, lr_0 = 1.5409e-04
Loss = 3.2588e-01, PNorm = 62.9089, GNorm = 1.3702, lr_0 = 1.5398e-04
Loss = 3.6764e-01, PNorm = 62.9080, GNorm = 1.3264, lr_0 = 1.5388e-04
Loss = 3.1630e-01, PNorm = 62.9069, GNorm = 1.8610, lr_0 = 1.5377e-04
Loss = 3.8908e-01, PNorm = 62.9090, GNorm = 1.9149, lr_0 = 1.5367e-04
Loss = 3.5430e-01, PNorm = 62.9096, GNorm = 2.0007, lr_0 = 1.5356e-04
Loss = 3.5683e-01, PNorm = 62.9094, GNorm = 1.4924, lr_0 = 1.5346e-04
Loss = 3.8200e-01, PNorm = 62.9096, GNorm = 1.4570, lr_0 = 1.5335e-04
Loss = 3.8845e-01, PNorm = 62.9138, GNorm = 1.2597, lr_0 = 1.5325e-04
Loss = 3.5468e-01, PNorm = 62.9162, GNorm = 2.0978, lr_0 = 1.5314e-04
Loss = 3.4156e-01, PNorm = 62.9181, GNorm = 1.5347, lr_0 = 1.5304e-04
Loss = 3.7602e-01, PNorm = 62.9192, GNorm = 1.6126, lr_0 = 1.5293e-04
Loss = 3.5583e-01, PNorm = 62.9207, GNorm = 1.7498, lr_0 = 1.5283e-04
Loss = 3.3210e-01, PNorm = 62.9239, GNorm = 1.0246, lr_0 = 1.5272e-04
Loss = 3.5153e-01, PNorm = 62.9267, GNorm = 2.1536, lr_0 = 1.5262e-04
Loss = 3.4313e-01, PNorm = 62.9274, GNorm = 1.5002, lr_0 = 1.5251e-04
Loss = 3.4623e-01, PNorm = 62.9282, GNorm = 1.2576, lr_0 = 1.5241e-04
Loss = 3.8700e-01, PNorm = 62.9275, GNorm = 1.5696, lr_0 = 1.5230e-04
Loss = 3.7264e-01, PNorm = 62.9290, GNorm = 1.4454, lr_0 = 1.5220e-04
Loss = 3.1703e-01, PNorm = 62.9312, GNorm = 1.2675, lr_0 = 1.5209e-04
Loss = 3.6447e-01, PNorm = 62.9314, GNorm = 1.8877, lr_0 = 1.5199e-04
Loss = 3.4680e-01, PNorm = 62.9320, GNorm = 1.4778, lr_0 = 1.5189e-04
Loss = 3.7156e-01, PNorm = 62.9335, GNorm = 1.7365, lr_0 = 1.5178e-04
Loss = 3.4507e-01, PNorm = 62.9348, GNorm = 1.3502, lr_0 = 1.5168e-04
Loss = 3.6295e-01, PNorm = 62.9364, GNorm = 1.2894, lr_0 = 1.5157e-04
Loss = 3.5880e-01, PNorm = 62.9384, GNorm = 1.5545, lr_0 = 1.5147e-04
Loss = 3.3266e-01, PNorm = 62.9363, GNorm = 1.2960, lr_0 = 1.5137e-04
Loss = 4.1128e-01, PNorm = 62.9334, GNorm = 1.7357, lr_0 = 1.5126e-04
Loss = 4.2058e-01, PNorm = 62.9361, GNorm = 1.2648, lr_0 = 1.5116e-04
Loss = 3.3532e-01, PNorm = 62.9414, GNorm = 1.6443, lr_0 = 1.5106e-04
Loss = 3.3857e-01, PNorm = 62.9448, GNorm = 1.3770, lr_0 = 1.5095e-04
Loss = 3.1760e-01, PNorm = 62.9446, GNorm = 1.3470, lr_0 = 1.5085e-04
Validation mae = 0.111071
Epoch 25
Loss = 3.8406e-01, PNorm = 62.9449, GNorm = 2.4150, lr_0 = 1.5075e-04
Loss = 3.3956e-01, PNorm = 62.9477, GNorm = 3.4116, lr_0 = 1.5064e-04
Loss = 3.8135e-01, PNorm = 62.9489, GNorm = 2.0290, lr_0 = 1.5054e-04
Loss = 3.3777e-01, PNorm = 62.9527, GNorm = 1.4498, lr_0 = 1.5044e-04
Loss = 2.9965e-01, PNorm = 62.9525, GNorm = 1.4450, lr_0 = 1.5033e-04
Loss = 3.3468e-01, PNorm = 62.9535, GNorm = 1.1665, lr_0 = 1.5023e-04
Loss = 3.1272e-01, PNorm = 62.9564, GNorm = 1.2619, lr_0 = 1.5013e-04
Loss = 4.1083e-01, PNorm = 62.9560, GNorm = 1.5458, lr_0 = 1.5002e-04
Loss = 3.5360e-01, PNorm = 62.9566, GNorm = 1.0928, lr_0 = 1.4992e-04
Loss = 3.8082e-01, PNorm = 62.9589, GNorm = 1.6773, lr_0 = 1.4982e-04
Loss = 3.0529e-01, PNorm = 62.9602, GNorm = 1.4807, lr_0 = 1.4972e-04
Loss = 3.5925e-01, PNorm = 62.9631, GNorm = 0.8529, lr_0 = 1.4961e-04
Loss = 3.5501e-01, PNorm = 62.9643, GNorm = 1.1511, lr_0 = 1.4951e-04
Loss = 3.4694e-01, PNorm = 62.9655, GNorm = 1.0538, lr_0 = 1.4941e-04
Loss = 3.5434e-01, PNorm = 62.9671, GNorm = 1.4114, lr_0 = 1.4931e-04
Loss = 3.4443e-01, PNorm = 62.9685, GNorm = 1.6312, lr_0 = 1.4920e-04
Loss = 4.3385e-01, PNorm = 62.9702, GNorm = 1.5752, lr_0 = 1.4910e-04
Loss = 3.6227e-01, PNorm = 62.9721, GNorm = 1.2916, lr_0 = 1.4900e-04
Loss = 3.4548e-01, PNorm = 62.9715, GNorm = 1.6483, lr_0 = 1.4890e-04
Loss = 3.3819e-01, PNorm = 62.9716, GNorm = 1.5441, lr_0 = 1.4880e-04
Loss = 3.6931e-01, PNorm = 62.9715, GNorm = 1.4867, lr_0 = 1.4869e-04
Loss = 3.4511e-01, PNorm = 62.9727, GNorm = 1.3736, lr_0 = 1.4859e-04
Loss = 3.4954e-01, PNorm = 62.9760, GNorm = 1.3442, lr_0 = 1.4849e-04
Loss = 3.6646e-01, PNorm = 62.9774, GNorm = 1.9872, lr_0 = 1.4839e-04
Loss = 2.9910e-01, PNorm = 62.9775, GNorm = 1.0294, lr_0 = 1.4829e-04
Loss = 4.2645e-01, PNorm = 62.9775, GNorm = 1.4493, lr_0 = 1.4818e-04
Loss = 2.8528e-01, PNorm = 62.9792, GNorm = 1.3448, lr_0 = 1.4808e-04
Loss = 3.5185e-01, PNorm = 62.9804, GNorm = 1.4434, lr_0 = 1.4798e-04
Loss = 3.3749e-01, PNorm = 62.9825, GNorm = 1.6289, lr_0 = 1.4788e-04
Loss = 3.2633e-01, PNorm = 62.9856, GNorm = 1.3702, lr_0 = 1.4778e-04
Loss = 3.3761e-01, PNorm = 62.9871, GNorm = 1.2646, lr_0 = 1.4768e-04
Loss = 2.8484e-01, PNorm = 62.9904, GNorm = 1.3567, lr_0 = 1.4758e-04
Loss = 3.8308e-01, PNorm = 62.9901, GNorm = 2.6738, lr_0 = 1.4748e-04
Loss = 3.3387e-01, PNorm = 62.9911, GNorm = 1.3933, lr_0 = 1.4737e-04
Loss = 3.2941e-01, PNorm = 62.9922, GNorm = 2.3678, lr_0 = 1.4727e-04
Loss = 3.5313e-01, PNorm = 62.9935, GNorm = 1.4614, lr_0 = 1.4717e-04
Loss = 3.4340e-01, PNorm = 62.9945, GNorm = 1.7491, lr_0 = 1.4707e-04
Loss = 3.3852e-01, PNorm = 62.9927, GNorm = 2.7945, lr_0 = 1.4697e-04
Loss = 3.4551e-01, PNorm = 62.9942, GNorm = 1.3047, lr_0 = 1.4687e-04
Loss = 3.4753e-01, PNorm = 62.9959, GNorm = 1.3457, lr_0 = 1.4677e-04
Loss = 3.5205e-01, PNorm = 62.9980, GNorm = 1.4035, lr_0 = 1.4667e-04
Loss = 3.3149e-01, PNorm = 62.9997, GNorm = 1.7316, lr_0 = 1.4657e-04
Loss = 3.2620e-01, PNorm = 62.9998, GNorm = 1.9388, lr_0 = 1.4647e-04
Loss = 3.8389e-01, PNorm = 63.0001, GNorm = 1.2043, lr_0 = 1.4637e-04
Loss = 3.7864e-01, PNorm = 63.0038, GNorm = 1.6867, lr_0 = 1.4627e-04
Loss = 3.1428e-01, PNorm = 63.0070, GNorm = 1.2347, lr_0 = 1.4617e-04
Loss = 3.2403e-01, PNorm = 63.0082, GNorm = 1.5595, lr_0 = 1.4607e-04
Loss = 3.5551e-01, PNorm = 63.0093, GNorm = 1.6996, lr_0 = 1.4597e-04
Loss = 3.7708e-01, PNorm = 63.0111, GNorm = 1.5708, lr_0 = 1.4587e-04
Loss = 3.7697e-01, PNorm = 63.0150, GNorm = 1.1623, lr_0 = 1.4577e-04
Loss = 3.5207e-01, PNorm = 63.0171, GNorm = 1.6269, lr_0 = 1.4567e-04
Loss = 3.6278e-01, PNorm = 63.0169, GNorm = 2.3043, lr_0 = 1.4557e-04
Loss = 4.2595e-01, PNorm = 63.0166, GNorm = 1.9449, lr_0 = 1.4547e-04
Loss = 3.2793e-01, PNorm = 63.0178, GNorm = 1.2062, lr_0 = 1.4537e-04
Loss = 3.6852e-01, PNorm = 63.0185, GNorm = 2.1192, lr_0 = 1.4527e-04
Loss = 3.3217e-01, PNorm = 63.0181, GNorm = 1.4895, lr_0 = 1.4517e-04
Loss = 3.3052e-01, PNorm = 63.0208, GNorm = 1.8957, lr_0 = 1.4507e-04
Loss = 2.8306e-01, PNorm = 63.0230, GNorm = 1.8453, lr_0 = 1.4497e-04
Loss = 3.4571e-01, PNorm = 63.0234, GNorm = 1.4324, lr_0 = 1.4487e-04
Loss = 3.2312e-01, PNorm = 63.0252, GNorm = 1.3997, lr_0 = 1.4477e-04
Loss = 3.3883e-01, PNorm = 63.0288, GNorm = 1.5502, lr_0 = 1.4467e-04
Loss = 3.2071e-01, PNorm = 63.0282, GNorm = 1.3810, lr_0 = 1.4457e-04
Loss = 3.8414e-01, PNorm = 63.0287, GNorm = 1.3047, lr_0 = 1.4447e-04
Loss = 3.6272e-01, PNorm = 63.0311, GNorm = 1.3862, lr_0 = 1.4438e-04
Loss = 3.2794e-01, PNorm = 63.0319, GNorm = 1.3304, lr_0 = 1.4428e-04
Loss = 3.5362e-01, PNorm = 63.0305, GNorm = 1.0668, lr_0 = 1.4418e-04
Loss = 3.3819e-01, PNorm = 63.0304, GNorm = 1.4813, lr_0 = 1.4408e-04
Loss = 3.6509e-01, PNorm = 63.0322, GNorm = 0.9263, lr_0 = 1.4398e-04
Loss = 3.4789e-01, PNorm = 63.0341, GNorm = 1.5208, lr_0 = 1.4388e-04
Loss = 3.3746e-01, PNorm = 63.0348, GNorm = 1.0226, lr_0 = 1.4378e-04
Loss = 3.3119e-01, PNorm = 63.0378, GNorm = 2.2806, lr_0 = 1.4368e-04
Loss = 3.6290e-01, PNorm = 63.0392, GNorm = 1.2731, lr_0 = 1.4359e-04
Loss = 3.2907e-01, PNorm = 63.0412, GNorm = 2.1506, lr_0 = 1.4349e-04
Loss = 3.6145e-01, PNorm = 63.0412, GNorm = 1.3936, lr_0 = 1.4339e-04
Loss = 3.2763e-01, PNorm = 63.0415, GNorm = 1.7913, lr_0 = 1.4329e-04
Loss = 3.9146e-01, PNorm = 63.0407, GNorm = 1.7128, lr_0 = 1.4319e-04
Loss = 3.4084e-01, PNorm = 63.0405, GNorm = 1.0975, lr_0 = 1.4310e-04
Loss = 3.0904e-01, PNorm = 63.0407, GNorm = 1.8292, lr_0 = 1.4300e-04
Loss = 3.5865e-01, PNorm = 63.0425, GNorm = 1.6103, lr_0 = 1.4290e-04
Loss = 3.4973e-01, PNorm = 63.0458, GNorm = 0.9938, lr_0 = 1.4280e-04
Loss = 3.3815e-01, PNorm = 63.0462, GNorm = 1.4161, lr_0 = 1.4270e-04
Loss = 3.5279e-01, PNorm = 63.0453, GNorm = 1.4142, lr_0 = 1.4261e-04
Loss = 3.5737e-01, PNorm = 63.0494, GNorm = 2.1935, lr_0 = 1.4251e-04
Loss = 3.2119e-01, PNorm = 63.0512, GNorm = 1.3164, lr_0 = 1.4241e-04
Loss = 3.5357e-01, PNorm = 63.0508, GNorm = 1.9763, lr_0 = 1.4231e-04
Loss = 3.5355e-01, PNorm = 63.0508, GNorm = 1.5429, lr_0 = 1.4222e-04
Loss = 3.6441e-01, PNorm = 63.0494, GNorm = 1.2770, lr_0 = 1.4212e-04
Loss = 3.8655e-01, PNorm = 63.0504, GNorm = 1.4135, lr_0 = 1.4202e-04
Loss = 4.3061e-01, PNorm = 63.0544, GNorm = 1.5002, lr_0 = 1.4192e-04
Loss = 3.8656e-01, PNorm = 63.0571, GNorm = 1.3770, lr_0 = 1.4183e-04
Loss = 3.7368e-01, PNorm = 63.0611, GNorm = 2.2980, lr_0 = 1.4173e-04
Loss = 2.8771e-01, PNorm = 63.0633, GNorm = 1.0464, lr_0 = 1.4163e-04
Loss = 3.2196e-01, PNorm = 63.0630, GNorm = 1.1892, lr_0 = 1.4153e-04
Loss = 3.6990e-01, PNorm = 63.0624, GNorm = 1.0058, lr_0 = 1.4144e-04
Loss = 3.4174e-01, PNorm = 63.0638, GNorm = 1.7451, lr_0 = 1.4134e-04
Loss = 3.3081e-01, PNorm = 63.0667, GNorm = 1.3266, lr_0 = 1.4124e-04
Loss = 4.0049e-01, PNorm = 63.0666, GNorm = 1.7394, lr_0 = 1.4115e-04
Loss = 3.6847e-01, PNorm = 63.0682, GNorm = 0.9647, lr_0 = 1.4105e-04
Loss = 3.3918e-01, PNorm = 63.0688, GNorm = 2.6980, lr_0 = 1.4095e-04
Loss = 3.2714e-01, PNorm = 63.0693, GNorm = 1.4147, lr_0 = 1.4086e-04
Loss = 3.9944e-01, PNorm = 63.0693, GNorm = 1.7741, lr_0 = 1.4076e-04
Loss = 3.7538e-01, PNorm = 63.0716, GNorm = 1.5407, lr_0 = 1.4066e-04
Loss = 4.0560e-01, PNorm = 63.0727, GNorm = 1.7759, lr_0 = 1.4057e-04
Loss = 3.3404e-01, PNorm = 63.0727, GNorm = 1.3128, lr_0 = 1.4047e-04
Loss = 3.0617e-01, PNorm = 63.0732, GNorm = 1.1939, lr_0 = 1.4038e-04
Loss = 3.4890e-01, PNorm = 63.0742, GNorm = 1.7431, lr_0 = 1.4028e-04
Loss = 3.4864e-01, PNorm = 63.0763, GNorm = 1.6257, lr_0 = 1.4018e-04
Loss = 3.0214e-01, PNorm = 63.0773, GNorm = 1.3350, lr_0 = 1.4009e-04
Loss = 3.7858e-01, PNorm = 63.0773, GNorm = 1.2963, lr_0 = 1.3999e-04
Loss = 3.7581e-01, PNorm = 63.0786, GNorm = 1.0821, lr_0 = 1.3990e-04
Loss = 3.4438e-01, PNorm = 63.0807, GNorm = 1.4561, lr_0 = 1.3980e-04
Loss = 3.3204e-01, PNorm = 63.0816, GNorm = 1.0611, lr_0 = 1.3970e-04
Loss = 3.7880e-01, PNorm = 63.0805, GNorm = 1.4269, lr_0 = 1.3961e-04
Loss = 3.3134e-01, PNorm = 63.0829, GNorm = 1.3397, lr_0 = 1.3951e-04
Loss = 3.5306e-01, PNorm = 63.0847, GNorm = 1.2839, lr_0 = 1.3942e-04
Loss = 3.4730e-01, PNorm = 63.0848, GNorm = 1.5221, lr_0 = 1.3932e-04
Loss = 3.8747e-01, PNorm = 63.0854, GNorm = 1.6052, lr_0 = 1.3923e-04
Loss = 3.5090e-01, PNorm = 63.0883, GNorm = 1.4770, lr_0 = 1.3913e-04
Loss = 3.2233e-01, PNorm = 63.0901, GNorm = 1.7047, lr_0 = 1.3904e-04
Loss = 3.4180e-01, PNorm = 63.0885, GNorm = 1.0377, lr_0 = 1.3894e-04
Validation mae = 0.110871
Epoch 26
Loss = 3.7293e-01, PNorm = 63.0891, GNorm = 1.3141, lr_0 = 1.3884e-04
Loss = 3.7096e-01, PNorm = 63.0913, GNorm = 1.1756, lr_0 = 1.3875e-04
Loss = 3.5206e-01, PNorm = 63.0911, GNorm = 1.7951, lr_0 = 1.3865e-04
Loss = 2.9305e-01, PNorm = 63.0931, GNorm = 1.2920, lr_0 = 1.3856e-04
Loss = 3.4899e-01, PNorm = 63.0937, GNorm = 1.0850, lr_0 = 1.3846e-04
Loss = 3.0435e-01, PNorm = 63.0947, GNorm = 1.4392, lr_0 = 1.3837e-04
Loss = 3.4683e-01, PNorm = 63.0968, GNorm = 1.5209, lr_0 = 1.3828e-04
Loss = 3.7913e-01, PNorm = 63.0985, GNorm = 1.3884, lr_0 = 1.3818e-04
Loss = 3.1828e-01, PNorm = 63.1010, GNorm = 1.1438, lr_0 = 1.3809e-04
Loss = 3.3308e-01, PNorm = 63.1041, GNorm = 1.3508, lr_0 = 1.3799e-04
Loss = 3.3350e-01, PNorm = 63.1054, GNorm = 1.3451, lr_0 = 1.3790e-04
Loss = 3.4341e-01, PNorm = 63.1066, GNorm = 1.6492, lr_0 = 1.3780e-04
Loss = 3.9044e-01, PNorm = 63.1075, GNorm = 1.8937, lr_0 = 1.3771e-04
Loss = 3.7331e-01, PNorm = 63.1096, GNorm = 1.2962, lr_0 = 1.3761e-04
Loss = 3.7090e-01, PNorm = 63.1098, GNorm = 1.6625, lr_0 = 1.3752e-04
Loss = 3.2390e-01, PNorm = 63.1093, GNorm = 1.3794, lr_0 = 1.3742e-04
Loss = 3.2444e-01, PNorm = 63.1110, GNorm = 1.2362, lr_0 = 1.3733e-04
Loss = 3.7034e-01, PNorm = 63.1131, GNorm = 1.5811, lr_0 = 1.3724e-04
Loss = 3.8449e-01, PNorm = 63.1151, GNorm = 1.2991, lr_0 = 1.3714e-04
Loss = 3.6738e-01, PNorm = 63.1161, GNorm = 1.3170, lr_0 = 1.3705e-04
Loss = 3.7836e-01, PNorm = 63.1171, GNorm = 1.6165, lr_0 = 1.3695e-04
Loss = 3.4484e-01, PNorm = 63.1177, GNorm = 1.6975, lr_0 = 1.3686e-04
Loss = 3.4743e-01, PNorm = 63.1199, GNorm = 1.8982, lr_0 = 1.3677e-04
Loss = 3.4745e-01, PNorm = 63.1228, GNorm = 1.6625, lr_0 = 1.3667e-04
Loss = 3.6948e-01, PNorm = 63.1240, GNorm = 2.2587, lr_0 = 1.3658e-04
Loss = 3.4747e-01, PNorm = 63.1259, GNorm = 1.3657, lr_0 = 1.3649e-04
Loss = 3.4127e-01, PNorm = 63.1304, GNorm = 1.3133, lr_0 = 1.3639e-04
Loss = 3.4852e-01, PNorm = 63.1306, GNorm = 1.5860, lr_0 = 1.3630e-04
Loss = 3.6069e-01, PNorm = 63.1305, GNorm = 1.8828, lr_0 = 1.3621e-04
Loss = 3.3918e-01, PNorm = 63.1331, GNorm = 1.3986, lr_0 = 1.3611e-04
Loss = 3.2924e-01, PNorm = 63.1347, GNorm = 2.1455, lr_0 = 1.3602e-04
Loss = 3.1294e-01, PNorm = 63.1327, GNorm = 2.3740, lr_0 = 1.3593e-04
Loss = 4.1285e-01, PNorm = 63.1338, GNorm = 1.3080, lr_0 = 1.3583e-04
Loss = 3.6110e-01, PNorm = 63.1355, GNorm = 1.7436, lr_0 = 1.3574e-04
Loss = 3.5927e-01, PNorm = 63.1388, GNorm = 1.5968, lr_0 = 1.3565e-04
Loss = 3.2662e-01, PNorm = 63.1399, GNorm = 1.2939, lr_0 = 1.3555e-04
Loss = 3.1470e-01, PNorm = 63.1410, GNorm = 1.7647, lr_0 = 1.3546e-04
Loss = 3.1303e-01, PNorm = 63.1415, GNorm = 1.1662, lr_0 = 1.3537e-04
Loss = 2.8475e-01, PNorm = 63.1418, GNorm = 1.3147, lr_0 = 1.3528e-04
Loss = 3.5121e-01, PNorm = 63.1413, GNorm = 2.1920, lr_0 = 1.3518e-04
Loss = 3.1004e-01, PNorm = 63.1409, GNorm = 2.0283, lr_0 = 1.3509e-04
Loss = 4.0100e-01, PNorm = 63.1437, GNorm = 1.4199, lr_0 = 1.3500e-04
Loss = 3.7242e-01, PNorm = 63.1443, GNorm = 1.4559, lr_0 = 1.3491e-04
Loss = 3.3404e-01, PNorm = 63.1453, GNorm = 1.7913, lr_0 = 1.3481e-04
Loss = 3.7018e-01, PNorm = 63.1463, GNorm = 1.5507, lr_0 = 1.3472e-04
Loss = 3.5495e-01, PNorm = 63.1494, GNorm = 1.9263, lr_0 = 1.3463e-04
Loss = 3.6824e-01, PNorm = 63.1502, GNorm = 2.2263, lr_0 = 1.3454e-04
Loss = 3.4518e-01, PNorm = 63.1514, GNorm = 1.2427, lr_0 = 1.3444e-04
Loss = 3.7956e-01, PNorm = 63.1540, GNorm = 1.6529, lr_0 = 1.3435e-04
Loss = 3.7892e-01, PNorm = 63.1530, GNorm = 1.3287, lr_0 = 1.3426e-04
Loss = 3.5289e-01, PNorm = 63.1545, GNorm = 2.1517, lr_0 = 1.3417e-04
Loss = 3.4608e-01, PNorm = 63.1588, GNorm = 1.3646, lr_0 = 1.3408e-04
Loss = 2.9628e-01, PNorm = 63.1610, GNorm = 2.0737, lr_0 = 1.3398e-04
Loss = 3.8921e-01, PNorm = 63.1601, GNorm = 1.5322, lr_0 = 1.3389e-04
Loss = 3.6241e-01, PNorm = 63.1613, GNorm = 1.8635, lr_0 = 1.3380e-04
Loss = 2.9809e-01, PNorm = 63.1638, GNorm = 1.4429, lr_0 = 1.3371e-04
Loss = 3.3675e-01, PNorm = 63.1648, GNorm = 1.4329, lr_0 = 1.3362e-04
Loss = 3.6021e-01, PNorm = 63.1649, GNorm = 1.8597, lr_0 = 1.3353e-04
Loss = 3.3543e-01, PNorm = 63.1661, GNorm = 1.7934, lr_0 = 1.3343e-04
Loss = 3.1947e-01, PNorm = 63.1693, GNorm = 1.5628, lr_0 = 1.3334e-04
Loss = 3.8743e-01, PNorm = 63.1692, GNorm = 1.4056, lr_0 = 1.3325e-04
Loss = 3.4505e-01, PNorm = 63.1701, GNorm = 1.4899, lr_0 = 1.3316e-04
Loss = 3.7528e-01, PNorm = 63.1721, GNorm = 1.5553, lr_0 = 1.3307e-04
Loss = 3.1290e-01, PNorm = 63.1746, GNorm = 1.3995, lr_0 = 1.3298e-04
Loss = 3.9628e-01, PNorm = 63.1750, GNorm = 1.4181, lr_0 = 1.3289e-04
Loss = 3.3166e-01, PNorm = 63.1751, GNorm = 1.5745, lr_0 = 1.3280e-04
Loss = 3.4795e-01, PNorm = 63.1770, GNorm = 1.3808, lr_0 = 1.3270e-04
Loss = 3.4826e-01, PNorm = 63.1785, GNorm = 1.6344, lr_0 = 1.3261e-04
Loss = 3.9648e-01, PNorm = 63.1775, GNorm = 1.3860, lr_0 = 1.3252e-04
Loss = 3.7189e-01, PNorm = 63.1788, GNorm = 1.3426, lr_0 = 1.3243e-04
Loss = 3.4100e-01, PNorm = 63.1799, GNorm = 2.3449, lr_0 = 1.3234e-04
Loss = 3.3307e-01, PNorm = 63.1799, GNorm = 1.2826, lr_0 = 1.3225e-04
Loss = 3.1369e-01, PNorm = 63.1792, GNorm = 1.5341, lr_0 = 1.3216e-04
Loss = 3.4828e-01, PNorm = 63.1812, GNorm = 1.8417, lr_0 = 1.3207e-04
Loss = 3.7329e-01, PNorm = 63.1828, GNorm = 1.6033, lr_0 = 1.3198e-04
Loss = 3.4575e-01, PNorm = 63.1836, GNorm = 1.2550, lr_0 = 1.3189e-04
Loss = 3.2470e-01, PNorm = 63.1836, GNorm = 1.6226, lr_0 = 1.3180e-04
Loss = 2.9154e-01, PNorm = 63.1860, GNorm = 1.1619, lr_0 = 1.3171e-04
Loss = 4.2566e-01, PNorm = 63.1869, GNorm = 1.2294, lr_0 = 1.3162e-04
Loss = 3.5903e-01, PNorm = 63.1876, GNorm = 1.4150, lr_0 = 1.3153e-04
Loss = 3.3455e-01, PNorm = 63.1893, GNorm = 1.5467, lr_0 = 1.3144e-04
Loss = 3.6613e-01, PNorm = 63.1902, GNorm = 1.1961, lr_0 = 1.3135e-04
Loss = 3.4615e-01, PNorm = 63.1902, GNorm = 2.1390, lr_0 = 1.3126e-04
Loss = 3.4019e-01, PNorm = 63.1913, GNorm = 1.4934, lr_0 = 1.3117e-04
Loss = 2.8339e-01, PNorm = 63.1938, GNorm = 1.1744, lr_0 = 1.3108e-04
Loss = 3.8396e-01, PNorm = 63.1964, GNorm = 1.3247, lr_0 = 1.3099e-04
Loss = 3.3718e-01, PNorm = 63.1973, GNorm = 1.7259, lr_0 = 1.3090e-04
Loss = 3.7435e-01, PNorm = 63.1989, GNorm = 1.4413, lr_0 = 1.3081e-04
Loss = 3.2949e-01, PNorm = 63.2002, GNorm = 1.5914, lr_0 = 1.3072e-04
Loss = 3.6871e-01, PNorm = 63.2002, GNorm = 1.2561, lr_0 = 1.3063e-04
Loss = 2.8484e-01, PNorm = 63.2024, GNorm = 2.0017, lr_0 = 1.3054e-04
Loss = 3.5354e-01, PNorm = 63.2053, GNorm = 1.4651, lr_0 = 1.3045e-04
Loss = 3.0718e-01, PNorm = 63.2076, GNorm = 1.3098, lr_0 = 1.3036e-04
Loss = 3.5634e-01, PNorm = 63.2081, GNorm = 1.4518, lr_0 = 1.3027e-04
Loss = 3.3650e-01, PNorm = 63.2089, GNorm = 1.2263, lr_0 = 1.3018e-04
Loss = 3.5929e-01, PNorm = 63.2115, GNorm = 1.7114, lr_0 = 1.3009e-04
Loss = 3.1265e-01, PNorm = 63.2139, GNorm = 1.1996, lr_0 = 1.3000e-04
Loss = 3.4851e-01, PNorm = 63.2159, GNorm = 2.2338, lr_0 = 1.2992e-04
Loss = 3.5763e-01, PNorm = 63.2138, GNorm = 1.5689, lr_0 = 1.2983e-04
Loss = 3.9849e-01, PNorm = 63.2137, GNorm = 1.3564, lr_0 = 1.2974e-04
Loss = 3.5190e-01, PNorm = 63.2153, GNorm = 1.4103, lr_0 = 1.2965e-04
Loss = 3.2000e-01, PNorm = 63.2156, GNorm = 1.6540, lr_0 = 1.2956e-04
Loss = 3.5845e-01, PNorm = 63.2166, GNorm = 1.4013, lr_0 = 1.2947e-04
Loss = 3.6963e-01, PNorm = 63.2193, GNorm = 1.6352, lr_0 = 1.2938e-04
Loss = 3.1987e-01, PNorm = 63.2197, GNorm = 1.0292, lr_0 = 1.2929e-04
Loss = 3.7610e-01, PNorm = 63.2195, GNorm = 1.7978, lr_0 = 1.2921e-04
Loss = 3.0947e-01, PNorm = 63.2234, GNorm = 1.0253, lr_0 = 1.2912e-04
Loss = 3.7784e-01, PNorm = 63.2241, GNorm = 1.6167, lr_0 = 1.2903e-04
Loss = 3.4817e-01, PNorm = 63.2240, GNorm = 1.4779, lr_0 = 1.2894e-04
Loss = 3.5431e-01, PNorm = 63.2245, GNorm = 2.0532, lr_0 = 1.2885e-04
Loss = 3.3605e-01, PNorm = 63.2261, GNorm = 1.8380, lr_0 = 1.2876e-04
Loss = 3.2249e-01, PNorm = 63.2267, GNorm = 2.0175, lr_0 = 1.2867e-04
Loss = 4.1219e-01, PNorm = 63.2283, GNorm = 1.8493, lr_0 = 1.2859e-04
Loss = 3.3110e-01, PNorm = 63.2313, GNorm = 2.2862, lr_0 = 1.2850e-04
Loss = 3.3073e-01, PNorm = 63.2330, GNorm = 1.2347, lr_0 = 1.2841e-04
Loss = 3.2360e-01, PNorm = 63.2330, GNorm = 1.2330, lr_0 = 1.2832e-04
Loss = 4.1062e-01, PNorm = 63.2336, GNorm = 1.8987, lr_0 = 1.2823e-04
Loss = 3.0609e-01, PNorm = 63.2358, GNorm = 1.9295, lr_0 = 1.2815e-04
Loss = 3.4633e-01, PNorm = 63.2360, GNorm = 1.6430, lr_0 = 1.2806e-04
Loss = 3.3968e-01, PNorm = 63.2362, GNorm = 1.5128, lr_0 = 1.2797e-04
Validation mae = 0.110809
Epoch 27
Loss = 3.4120e-01, PNorm = 63.2380, GNorm = 1.4169, lr_0 = 1.2788e-04
Loss = 3.2807e-01, PNorm = 63.2397, GNorm = 1.3312, lr_0 = 1.2780e-04
Loss = 3.4302e-01, PNorm = 63.2418, GNorm = 1.2900, lr_0 = 1.2771e-04
Loss = 3.1639e-01, PNorm = 63.2420, GNorm = 1.4244, lr_0 = 1.2762e-04
Loss = 3.8117e-01, PNorm = 63.2425, GNorm = 1.6688, lr_0 = 1.2753e-04
Loss = 3.9313e-01, PNorm = 63.2446, GNorm = 1.9532, lr_0 = 1.2745e-04
Loss = 3.4154e-01, PNorm = 63.2467, GNorm = 2.2823, lr_0 = 1.2736e-04
Loss = 3.3972e-01, PNorm = 63.2470, GNorm = 1.7721, lr_0 = 1.2727e-04
Loss = 3.4935e-01, PNorm = 63.2481, GNorm = 1.2855, lr_0 = 1.2718e-04
Loss = 3.5254e-01, PNorm = 63.2503, GNorm = 1.5613, lr_0 = 1.2710e-04
Loss = 3.3872e-01, PNorm = 63.2529, GNorm = 1.4682, lr_0 = 1.2701e-04
Loss = 3.0269e-01, PNorm = 63.2554, GNorm = 2.3768, lr_0 = 1.2692e-04
Loss = 3.7328e-01, PNorm = 63.2560, GNorm = 1.8562, lr_0 = 1.2684e-04
Loss = 3.9444e-01, PNorm = 63.2580, GNorm = 1.3853, lr_0 = 1.2675e-04
Loss = 3.3221e-01, PNorm = 63.2591, GNorm = 1.3329, lr_0 = 1.2666e-04
Loss = 3.1064e-01, PNorm = 63.2592, GNorm = 1.0415, lr_0 = 1.2658e-04
Loss = 3.8873e-01, PNorm = 63.2594, GNorm = 2.6846, lr_0 = 1.2649e-04
Loss = 3.2889e-01, PNorm = 63.2578, GNorm = 1.2514, lr_0 = 1.2640e-04
Loss = 3.4092e-01, PNorm = 63.2580, GNorm = 1.3982, lr_0 = 1.2632e-04
Loss = 3.5276e-01, PNorm = 63.2587, GNorm = 1.8247, lr_0 = 1.2623e-04
Loss = 3.3626e-01, PNorm = 63.2596, GNorm = 1.5821, lr_0 = 1.2614e-04
Loss = 3.6959e-01, PNorm = 63.2607, GNorm = 1.4764, lr_0 = 1.2606e-04
Loss = 3.5424e-01, PNorm = 63.2633, GNorm = 1.2159, lr_0 = 1.2597e-04
Loss = 3.0700e-01, PNorm = 63.2653, GNorm = 1.6864, lr_0 = 1.2588e-04
Loss = 3.6387e-01, PNorm = 63.2636, GNorm = 1.2975, lr_0 = 1.2580e-04
Loss = 3.6217e-01, PNorm = 63.2642, GNorm = 1.4451, lr_0 = 1.2571e-04
Loss = 3.4718e-01, PNorm = 63.2656, GNorm = 1.7176, lr_0 = 1.2563e-04
Loss = 3.3986e-01, PNorm = 63.2653, GNorm = 1.4217, lr_0 = 1.2554e-04
Loss = 3.5975e-01, PNorm = 63.2657, GNorm = 1.5795, lr_0 = 1.2545e-04
Loss = 3.4311e-01, PNorm = 63.2678, GNorm = 1.4051, lr_0 = 1.2537e-04
Loss = 3.9508e-01, PNorm = 63.2700, GNorm = 1.8229, lr_0 = 1.2528e-04
Loss = 3.3061e-01, PNorm = 63.2702, GNorm = 1.3285, lr_0 = 1.2520e-04
Loss = 3.6834e-01, PNorm = 63.2703, GNorm = 1.7459, lr_0 = 1.2511e-04
Loss = 3.5033e-01, PNorm = 63.2704, GNorm = 1.5183, lr_0 = 1.2502e-04
Loss = 3.7409e-01, PNorm = 63.2707, GNorm = 1.9387, lr_0 = 1.2494e-04
Loss = 3.4813e-01, PNorm = 63.2730, GNorm = 1.6401, lr_0 = 1.2485e-04
Loss = 3.3889e-01, PNorm = 63.2739, GNorm = 1.0199, lr_0 = 1.2477e-04
Loss = 3.5938e-01, PNorm = 63.2748, GNorm = 1.9836, lr_0 = 1.2468e-04
Loss = 3.1588e-01, PNorm = 63.2738, GNorm = 1.3620, lr_0 = 1.2460e-04
Loss = 3.6498e-01, PNorm = 63.2747, GNorm = 1.1620, lr_0 = 1.2451e-04
Loss = 3.3820e-01, PNorm = 63.2760, GNorm = 1.5017, lr_0 = 1.2443e-04
Loss = 3.3778e-01, PNorm = 63.2770, GNorm = 1.0836, lr_0 = 1.2434e-04
Loss = 3.2340e-01, PNorm = 63.2781, GNorm = 2.7217, lr_0 = 1.2426e-04
Loss = 3.2706e-01, PNorm = 63.2792, GNorm = 1.7574, lr_0 = 1.2417e-04
Loss = 3.2065e-01, PNorm = 63.2813, GNorm = 1.2933, lr_0 = 1.2409e-04
Loss = 3.5561e-01, PNorm = 63.2825, GNorm = 1.3458, lr_0 = 1.2400e-04
Loss = 3.5169e-01, PNorm = 63.2823, GNorm = 1.7405, lr_0 = 1.2392e-04
Loss = 3.9935e-01, PNorm = 63.2827, GNorm = 2.3373, lr_0 = 1.2383e-04
Loss = 3.6196e-01, PNorm = 63.2839, GNorm = 1.2282, lr_0 = 1.2375e-04
Loss = 3.1988e-01, PNorm = 63.2847, GNorm = 1.4208, lr_0 = 1.2366e-04
Loss = 3.7909e-01, PNorm = 63.2860, GNorm = 0.9930, lr_0 = 1.2358e-04
Loss = 3.6100e-01, PNorm = 63.2864, GNorm = 1.7119, lr_0 = 1.2349e-04
Loss = 3.2761e-01, PNorm = 63.2877, GNorm = 1.5478, lr_0 = 1.2341e-04
Loss = 3.8960e-01, PNorm = 63.2886, GNorm = 1.6374, lr_0 = 1.2332e-04
Loss = 3.6519e-01, PNorm = 63.2880, GNorm = 1.5250, lr_0 = 1.2324e-04
Loss = 3.4831e-01, PNorm = 63.2889, GNorm = 1.5741, lr_0 = 1.2315e-04
Loss = 3.1660e-01, PNorm = 63.2913, GNorm = 1.1983, lr_0 = 1.2307e-04
Loss = 3.4793e-01, PNorm = 63.2918, GNorm = 2.2029, lr_0 = 1.2298e-04
Loss = 3.2601e-01, PNorm = 63.2931, GNorm = 1.2606, lr_0 = 1.2290e-04
Loss = 3.4923e-01, PNorm = 63.2952, GNorm = 1.7267, lr_0 = 1.2282e-04
Loss = 3.5402e-01, PNorm = 63.2977, GNorm = 1.5986, lr_0 = 1.2273e-04
Loss = 3.3995e-01, PNorm = 63.2990, GNorm = 2.0450, lr_0 = 1.2265e-04
Loss = 3.2429e-01, PNorm = 63.2995, GNorm = 1.1345, lr_0 = 1.2256e-04
Loss = 3.2067e-01, PNorm = 63.3019, GNorm = 1.5176, lr_0 = 1.2248e-04
Loss = 3.0494e-01, PNorm = 63.3044, GNorm = 1.3365, lr_0 = 1.2240e-04
Loss = 3.3740e-01, PNorm = 63.3037, GNorm = 1.9718, lr_0 = 1.2231e-04
Loss = 3.5918e-01, PNorm = 63.3038, GNorm = 1.9043, lr_0 = 1.2223e-04
Loss = 3.3977e-01, PNorm = 63.3058, GNorm = 1.2101, lr_0 = 1.2214e-04
Loss = 3.3288e-01, PNorm = 63.3076, GNorm = 2.0225, lr_0 = 1.2206e-04
Loss = 3.6691e-01, PNorm = 63.3078, GNorm = 1.3263, lr_0 = 1.2198e-04
Loss = 3.2830e-01, PNorm = 63.3088, GNorm = 1.3942, lr_0 = 1.2189e-04
Loss = 4.0752e-01, PNorm = 63.3103, GNorm = 1.3249, lr_0 = 1.2181e-04
Loss = 3.5377e-01, PNorm = 63.3120, GNorm = 1.6037, lr_0 = 1.2173e-04
Loss = 3.7050e-01, PNorm = 63.3119, GNorm = 1.4230, lr_0 = 1.2164e-04
Loss = 3.4349e-01, PNorm = 63.3133, GNorm = 1.2335, lr_0 = 1.2156e-04
Loss = 3.4930e-01, PNorm = 63.3155, GNorm = 1.2728, lr_0 = 1.2148e-04
Loss = 3.1903e-01, PNorm = 63.3163, GNorm = 1.4334, lr_0 = 1.2139e-04
Loss = 3.7503e-01, PNorm = 63.3156, GNorm = 1.3070, lr_0 = 1.2131e-04
Loss = 3.3689e-01, PNorm = 63.3167, GNorm = 1.5580, lr_0 = 1.2123e-04
Loss = 3.5785e-01, PNorm = 63.3186, GNorm = 2.1096, lr_0 = 1.2114e-04
Loss = 3.2045e-01, PNorm = 63.3199, GNorm = 1.6237, lr_0 = 1.2106e-04
Loss = 4.1566e-01, PNorm = 63.3218, GNorm = 1.2020, lr_0 = 1.2098e-04
Loss = 3.4227e-01, PNorm = 63.3226, GNorm = 1.9861, lr_0 = 1.2090e-04
Loss = 3.9542e-01, PNorm = 63.3230, GNorm = 1.3196, lr_0 = 1.2081e-04
Loss = 3.6960e-01, PNorm = 63.3242, GNorm = 1.8151, lr_0 = 1.2073e-04
Loss = 3.3996e-01, PNorm = 63.3272, GNorm = 1.3467, lr_0 = 1.2065e-04
Loss = 3.4777e-01, PNorm = 63.3278, GNorm = 1.2521, lr_0 = 1.2056e-04
Loss = 2.8946e-01, PNorm = 63.3281, GNorm = 2.1145, lr_0 = 1.2048e-04
Loss = 3.4046e-01, PNorm = 63.3294, GNorm = 1.3127, lr_0 = 1.2040e-04
Loss = 3.2571e-01, PNorm = 63.3293, GNorm = 1.6788, lr_0 = 1.2032e-04
Loss = 3.4937e-01, PNorm = 63.3295, GNorm = 1.3865, lr_0 = 1.2023e-04
Loss = 3.4755e-01, PNorm = 63.3303, GNorm = 1.5688, lr_0 = 1.2015e-04
Loss = 3.2539e-01, PNorm = 63.3318, GNorm = 1.1268, lr_0 = 1.2007e-04
Loss = 3.8837e-01, PNorm = 63.3322, GNorm = 1.4341, lr_0 = 1.1999e-04
Loss = 3.5641e-01, PNorm = 63.3325, GNorm = 1.5589, lr_0 = 1.1991e-04
Loss = 3.1610e-01, PNorm = 63.3323, GNorm = 1.5034, lr_0 = 1.1982e-04
Loss = 2.8908e-01, PNorm = 63.3340, GNorm = 1.6564, lr_0 = 1.1974e-04
Loss = 3.7117e-01, PNorm = 63.3351, GNorm = 1.8920, lr_0 = 1.1966e-04
Loss = 3.3549e-01, PNorm = 63.3363, GNorm = 1.5325, lr_0 = 1.1958e-04
Loss = 3.2511e-01, PNorm = 63.3371, GNorm = 1.6430, lr_0 = 1.1950e-04
Loss = 3.8121e-01, PNorm = 63.3377, GNorm = 1.4253, lr_0 = 1.1941e-04
Loss = 3.8063e-01, PNorm = 63.3378, GNorm = 1.9672, lr_0 = 1.1933e-04
Loss = 3.1541e-01, PNorm = 63.3390, GNorm = 1.5603, lr_0 = 1.1925e-04
Loss = 3.5407e-01, PNorm = 63.3399, GNorm = 1.3550, lr_0 = 1.1917e-04
Loss = 3.5070e-01, PNorm = 63.3404, GNorm = 1.4198, lr_0 = 1.1909e-04
Loss = 3.3835e-01, PNorm = 63.3419, GNorm = 1.4602, lr_0 = 1.1901e-04
Loss = 3.6043e-01, PNorm = 63.3436, GNorm = 1.4740, lr_0 = 1.1892e-04
Loss = 3.4815e-01, PNorm = 63.3431, GNorm = 1.8015, lr_0 = 1.1884e-04
Loss = 3.2429e-01, PNorm = 63.3426, GNorm = 1.2005, lr_0 = 1.1876e-04
Loss = 3.7647e-01, PNorm = 63.3430, GNorm = 1.5596, lr_0 = 1.1868e-04
Loss = 3.8446e-01, PNorm = 63.3443, GNorm = 1.6800, lr_0 = 1.1860e-04
Loss = 2.9347e-01, PNorm = 63.3457, GNorm = 1.8340, lr_0 = 1.1852e-04
Loss = 3.6503e-01, PNorm = 63.3462, GNorm = 1.4988, lr_0 = 1.1844e-04
Loss = 3.7627e-01, PNorm = 63.3465, GNorm = 1.8227, lr_0 = 1.1835e-04
Loss = 3.5618e-01, PNorm = 63.3475, GNorm = 1.5948, lr_0 = 1.1827e-04
Loss = 3.3095e-01, PNorm = 63.3481, GNorm = 1.8714, lr_0 = 1.1819e-04
Loss = 3.0422e-01, PNorm = 63.3491, GNorm = 1.3060, lr_0 = 1.1811e-04
Loss = 3.0730e-01, PNorm = 63.3493, GNorm = 1.4428, lr_0 = 1.1803e-04
Loss = 3.5174e-01, PNorm = 63.3511, GNorm = 1.8166, lr_0 = 1.1795e-04
Loss = 2.9238e-01, PNorm = 63.3512, GNorm = 1.2085, lr_0 = 1.1787e-04
Validation mae = 0.111271
Epoch 28
Loss = 3.5425e-01, PNorm = 63.3515, GNorm = 1.7767, lr_0 = 1.1779e-04
Loss = 3.0659e-01, PNorm = 63.3522, GNorm = 1.4969, lr_0 = 1.1771e-04
Loss = 3.8064e-01, PNorm = 63.3547, GNorm = 1.8907, lr_0 = 1.1763e-04
Loss = 3.5226e-01, PNorm = 63.3565, GNorm = 1.0706, lr_0 = 1.1755e-04
Loss = 3.0391e-01, PNorm = 63.3593, GNorm = 2.0377, lr_0 = 1.1747e-04
Loss = 3.3242e-01, PNorm = 63.3612, GNorm = 1.4563, lr_0 = 1.1739e-04
Loss = 3.6147e-01, PNorm = 63.3617, GNorm = 1.5132, lr_0 = 1.1730e-04
Loss = 3.3466e-01, PNorm = 63.3608, GNorm = 1.2812, lr_0 = 1.1722e-04
Loss = 3.5613e-01, PNorm = 63.3595, GNorm = 1.2419, lr_0 = 1.1714e-04
Loss = 3.4262e-01, PNorm = 63.3613, GNorm = 1.4644, lr_0 = 1.1706e-04
Loss = 3.2854e-01, PNorm = 63.3603, GNorm = 1.4372, lr_0 = 1.1698e-04
Loss = 3.3919e-01, PNorm = 63.3619, GNorm = 1.6439, lr_0 = 1.1690e-04
Loss = 3.5442e-01, PNorm = 63.3640, GNorm = 1.4352, lr_0 = 1.1682e-04
Loss = 3.3822e-01, PNorm = 63.3654, GNorm = 1.4863, lr_0 = 1.1674e-04
Loss = 3.3793e-01, PNorm = 63.3667, GNorm = 1.4363, lr_0 = 1.1666e-04
Loss = 3.3324e-01, PNorm = 63.3676, GNorm = 1.4232, lr_0 = 1.1658e-04
Loss = 2.9742e-01, PNorm = 63.3685, GNorm = 1.3482, lr_0 = 1.1650e-04
Loss = 3.4279e-01, PNorm = 63.3701, GNorm = 2.0759, lr_0 = 1.1642e-04
Loss = 3.0903e-01, PNorm = 63.3704, GNorm = 1.0433, lr_0 = 1.1634e-04
Loss = 3.8539e-01, PNorm = 63.3702, GNorm = 1.3649, lr_0 = 1.1626e-04
Loss = 3.5521e-01, PNorm = 63.3713, GNorm = 1.8528, lr_0 = 1.1618e-04
Loss = 3.5942e-01, PNorm = 63.3732, GNorm = 2.1752, lr_0 = 1.1611e-04
Loss = 4.2120e-01, PNorm = 63.3755, GNorm = 2.1012, lr_0 = 1.1603e-04
Loss = 3.7091e-01, PNorm = 63.3756, GNorm = 1.4721, lr_0 = 1.1595e-04
Loss = 3.2914e-01, PNorm = 63.3774, GNorm = 1.1027, lr_0 = 1.1587e-04
Loss = 2.8720e-01, PNorm = 63.3778, GNorm = 1.3150, lr_0 = 1.1579e-04
Loss = 3.3686e-01, PNorm = 63.3770, GNorm = 1.2975, lr_0 = 1.1571e-04
Loss = 3.3645e-01, PNorm = 63.3789, GNorm = 1.6858, lr_0 = 1.1563e-04
Loss = 3.4831e-01, PNorm = 63.3807, GNorm = 1.2690, lr_0 = 1.1555e-04
Loss = 3.4271e-01, PNorm = 63.3799, GNorm = 1.5515, lr_0 = 1.1547e-04
Loss = 3.6664e-01, PNorm = 63.3805, GNorm = 1.5384, lr_0 = 1.1539e-04
Loss = 3.7789e-01, PNorm = 63.3826, GNorm = 1.3200, lr_0 = 1.1531e-04
Loss = 3.5190e-01, PNorm = 63.3842, GNorm = 1.5794, lr_0 = 1.1523e-04
Loss = 3.4031e-01, PNorm = 63.3860, GNorm = 1.3664, lr_0 = 1.1515e-04
Loss = 3.1824e-01, PNorm = 63.3873, GNorm = 1.3157, lr_0 = 1.1508e-04
Loss = 3.1362e-01, PNorm = 63.3875, GNorm = 1.9647, lr_0 = 1.1500e-04
Loss = 2.9583e-01, PNorm = 63.3879, GNorm = 1.3005, lr_0 = 1.1492e-04
Loss = 2.9829e-01, PNorm = 63.3881, GNorm = 1.2513, lr_0 = 1.1484e-04
Loss = 4.0870e-01, PNorm = 63.3877, GNorm = 1.4607, lr_0 = 1.1476e-04
Loss = 3.5982e-01, PNorm = 63.3878, GNorm = 1.5433, lr_0 = 1.1468e-04
Loss = 4.0211e-01, PNorm = 63.3886, GNorm = 1.5002, lr_0 = 1.1460e-04
Loss = 2.9038e-01, PNorm = 63.3899, GNorm = 1.3254, lr_0 = 1.1452e-04
Loss = 3.4022e-01, PNorm = 63.3899, GNorm = 1.2541, lr_0 = 1.1445e-04
Loss = 3.1796e-01, PNorm = 63.3873, GNorm = 1.5578, lr_0 = 1.1437e-04
Loss = 3.1809e-01, PNorm = 63.3858, GNorm = 1.3568, lr_0 = 1.1429e-04
Loss = 3.6476e-01, PNorm = 63.3850, GNorm = 1.0809, lr_0 = 1.1421e-04
Loss = 2.9845e-01, PNorm = 63.3865, GNorm = 1.3417, lr_0 = 1.1413e-04
Loss = 3.0902e-01, PNorm = 63.3877, GNorm = 1.2447, lr_0 = 1.1405e-04
Loss = 3.4449e-01, PNorm = 63.3887, GNorm = 1.1730, lr_0 = 1.1398e-04
Loss = 3.1288e-01, PNorm = 63.3906, GNorm = 1.2702, lr_0 = 1.1390e-04
Loss = 3.3655e-01, PNorm = 63.3922, GNorm = 1.7314, lr_0 = 1.1382e-04
Loss = 3.1171e-01, PNorm = 63.3922, GNorm = 1.3457, lr_0 = 1.1374e-04
Loss = 3.5779e-01, PNorm = 63.3911, GNorm = 1.2906, lr_0 = 1.1366e-04
Loss = 3.5578e-01, PNorm = 63.3912, GNorm = 1.7464, lr_0 = 1.1359e-04
Loss = 3.1392e-01, PNorm = 63.3929, GNorm = 1.3051, lr_0 = 1.1351e-04
Loss = 3.2651e-01, PNorm = 63.3961, GNorm = 1.7076, lr_0 = 1.1343e-04
Loss = 3.6031e-01, PNorm = 63.3964, GNorm = 1.4006, lr_0 = 1.1335e-04
Loss = 3.1556e-01, PNorm = 63.3963, GNorm = 1.4726, lr_0 = 1.1328e-04
Loss = 3.3988e-01, PNorm = 63.3962, GNorm = 1.4950, lr_0 = 1.1320e-04
Loss = 3.4784e-01, PNorm = 63.3976, GNorm = 1.2176, lr_0 = 1.1312e-04
Loss = 3.1697e-01, PNorm = 63.3996, GNorm = 1.6712, lr_0 = 1.1304e-04
Loss = 3.2033e-01, PNorm = 63.3993, GNorm = 1.5885, lr_0 = 1.1297e-04
Loss = 4.0057e-01, PNorm = 63.3983, GNorm = 1.4349, lr_0 = 1.1289e-04
Loss = 3.2874e-01, PNorm = 63.3994, GNorm = 1.7116, lr_0 = 1.1281e-04
Loss = 3.3643e-01, PNorm = 63.4009, GNorm = 1.9374, lr_0 = 1.1273e-04
Loss = 3.2046e-01, PNorm = 63.4031, GNorm = 1.5800, lr_0 = 1.1266e-04
Loss = 3.5227e-01, PNorm = 63.4036, GNorm = 1.6032, lr_0 = 1.1258e-04
Loss = 3.3945e-01, PNorm = 63.4054, GNorm = 1.6937, lr_0 = 1.1250e-04
Loss = 3.3359e-01, PNorm = 63.4071, GNorm = 1.7945, lr_0 = 1.1243e-04
Loss = 4.2307e-01, PNorm = 63.4089, GNorm = 1.5127, lr_0 = 1.1235e-04
Loss = 3.4347e-01, PNorm = 63.4109, GNorm = 1.3643, lr_0 = 1.1227e-04
Loss = 3.2830e-01, PNorm = 63.4131, GNorm = 1.1265, lr_0 = 1.1219e-04
Loss = 4.0165e-01, PNorm = 63.4156, GNorm = 1.7200, lr_0 = 1.1212e-04
Loss = 3.0976e-01, PNorm = 63.4172, GNorm = 1.3207, lr_0 = 1.1204e-04
Loss = 3.6367e-01, PNorm = 63.4172, GNorm = 1.3060, lr_0 = 1.1196e-04
Loss = 4.0187e-01, PNorm = 63.4189, GNorm = 2.6766, lr_0 = 1.1189e-04
Loss = 3.1716e-01, PNorm = 63.4208, GNorm = 1.1950, lr_0 = 1.1181e-04
Loss = 3.6204e-01, PNorm = 63.4211, GNorm = 1.6824, lr_0 = 1.1173e-04
Loss = 3.4881e-01, PNorm = 63.4215, GNorm = 1.8548, lr_0 = 1.1166e-04
Loss = 3.5019e-01, PNorm = 63.4240, GNorm = 1.6494, lr_0 = 1.1158e-04
Loss = 3.8380e-01, PNorm = 63.4261, GNorm = 1.4670, lr_0 = 1.1150e-04
Loss = 3.9861e-01, PNorm = 63.4287, GNorm = 1.5040, lr_0 = 1.1143e-04
Loss = 3.5191e-01, PNorm = 63.4310, GNorm = 1.8196, lr_0 = 1.1135e-04
Loss = 3.4206e-01, PNorm = 63.4312, GNorm = 1.7026, lr_0 = 1.1128e-04
Loss = 3.3406e-01, PNorm = 63.4321, GNorm = 2.1512, lr_0 = 1.1120e-04
Loss = 3.2964e-01, PNorm = 63.4329, GNorm = 1.6844, lr_0 = 1.1112e-04
Loss = 3.6967e-01, PNorm = 63.4341, GNorm = 1.9031, lr_0 = 1.1105e-04
Loss = 3.2904e-01, PNorm = 63.4357, GNorm = 1.4924, lr_0 = 1.1097e-04
Loss = 3.5893e-01, PNorm = 63.4363, GNorm = 2.6564, lr_0 = 1.1089e-04
Loss = 3.2226e-01, PNorm = 63.4358, GNorm = 1.2910, lr_0 = 1.1082e-04
Loss = 3.3564e-01, PNorm = 63.4375, GNorm = 1.8949, lr_0 = 1.1074e-04
Loss = 3.3758e-01, PNorm = 63.4385, GNorm = 1.5847, lr_0 = 1.1067e-04
Loss = 3.0245e-01, PNorm = 63.4396, GNorm = 1.4496, lr_0 = 1.1059e-04
Loss = 3.4624e-01, PNorm = 63.4398, GNorm = 1.5854, lr_0 = 1.1052e-04
Loss = 3.8560e-01, PNorm = 63.4409, GNorm = 1.6637, lr_0 = 1.1044e-04
Loss = 3.6518e-01, PNorm = 63.4433, GNorm = 1.6608, lr_0 = 1.1036e-04
Loss = 3.7602e-01, PNorm = 63.4451, GNorm = 1.4273, lr_0 = 1.1029e-04
Loss = 4.1242e-01, PNorm = 63.4458, GNorm = 1.6493, lr_0 = 1.1021e-04
Loss = 3.2917e-01, PNorm = 63.4468, GNorm = 2.3337, lr_0 = 1.1014e-04
Loss = 3.7990e-01, PNorm = 63.4467, GNorm = 1.9831, lr_0 = 1.1006e-04
Loss = 3.2277e-01, PNorm = 63.4486, GNorm = 1.2198, lr_0 = 1.0999e-04
Loss = 3.0185e-01, PNorm = 63.4503, GNorm = 1.5779, lr_0 = 1.0991e-04
Loss = 3.2298e-01, PNorm = 63.4502, GNorm = 1.2591, lr_0 = 1.0984e-04
Loss = 4.1119e-01, PNorm = 63.4516, GNorm = 1.5954, lr_0 = 1.0976e-04
Loss = 3.7354e-01, PNorm = 63.4528, GNorm = 1.6785, lr_0 = 1.0969e-04
Loss = 3.2049e-01, PNorm = 63.4533, GNorm = 1.4110, lr_0 = 1.0961e-04
Loss = 3.4848e-01, PNorm = 63.4543, GNorm = 2.0141, lr_0 = 1.0954e-04
Loss = 3.8921e-01, PNorm = 63.4540, GNorm = 1.3776, lr_0 = 1.0946e-04
Loss = 3.6149e-01, PNorm = 63.4565, GNorm = 1.7966, lr_0 = 1.0939e-04
Loss = 3.1088e-01, PNorm = 63.4581, GNorm = 1.4987, lr_0 = 1.0931e-04
Loss = 3.0788e-01, PNorm = 63.4600, GNorm = 1.3549, lr_0 = 1.0924e-04
Loss = 3.6954e-01, PNorm = 63.4623, GNorm = 1.6603, lr_0 = 1.0916e-04
Loss = 3.3637e-01, PNorm = 63.4651, GNorm = 1.4670, lr_0 = 1.0909e-04
Loss = 3.1769e-01, PNorm = 63.4678, GNorm = 1.4696, lr_0 = 1.0901e-04
Loss = 3.0762e-01, PNorm = 63.4661, GNorm = 1.2345, lr_0 = 1.0894e-04
Loss = 3.6928e-01, PNorm = 63.4654, GNorm = 1.4042, lr_0 = 1.0886e-04
Loss = 3.1029e-01, PNorm = 63.4653, GNorm = 1.2576, lr_0 = 1.0879e-04
Loss = 3.5025e-01, PNorm = 63.4668, GNorm = 1.6566, lr_0 = 1.0871e-04
Loss = 3.7722e-01, PNorm = 63.4670, GNorm = 1.4135, lr_0 = 1.0864e-04
Loss = 3.0397e-01, PNorm = 63.4671, GNorm = 1.6427, lr_0 = 1.0856e-04
Validation mae = 0.110943
Epoch 29
Loss = 3.6113e-01, PNorm = 63.4682, GNorm = 2.3981, lr_0 = 1.0849e-04
Loss = 3.0248e-01, PNorm = 63.4696, GNorm = 1.4218, lr_0 = 1.0841e-04
Loss = 3.3109e-01, PNorm = 63.4698, GNorm = 1.1257, lr_0 = 1.0834e-04
Loss = 3.7885e-01, PNorm = 63.4701, GNorm = 1.7371, lr_0 = 1.0827e-04
Loss = 3.5124e-01, PNorm = 63.4697, GNorm = 1.4294, lr_0 = 1.0819e-04
Loss = 2.9690e-01, PNorm = 63.4700, GNorm = 1.3537, lr_0 = 1.0812e-04
Loss = 3.6068e-01, PNorm = 63.4708, GNorm = 2.0895, lr_0 = 1.0804e-04
Loss = 3.5295e-01, PNorm = 63.4708, GNorm = 1.3636, lr_0 = 1.0797e-04
Loss = 3.4103e-01, PNorm = 63.4716, GNorm = 1.9729, lr_0 = 1.0790e-04
Loss = 3.4731e-01, PNorm = 63.4730, GNorm = 1.6890, lr_0 = 1.0782e-04
Loss = 3.6152e-01, PNorm = 63.4755, GNorm = 1.9953, lr_0 = 1.0775e-04
Loss = 3.0780e-01, PNorm = 63.4767, GNorm = 1.5965, lr_0 = 1.0767e-04
Loss = 3.2428e-01, PNorm = 63.4771, GNorm = 1.1532, lr_0 = 1.0760e-04
Loss = 3.8260e-01, PNorm = 63.4770, GNorm = 1.6144, lr_0 = 1.0753e-04
Loss = 3.8849e-01, PNorm = 63.4772, GNorm = 1.5937, lr_0 = 1.0745e-04
Loss = 3.4135e-01, PNorm = 63.4784, GNorm = 1.7496, lr_0 = 1.0738e-04
Loss = 3.5089e-01, PNorm = 63.4793, GNorm = 1.7207, lr_0 = 1.0731e-04
Loss = 3.7250e-01, PNorm = 63.4807, GNorm = 1.3152, lr_0 = 1.0723e-04
Loss = 3.3219e-01, PNorm = 63.4816, GNorm = 1.6314, lr_0 = 1.0716e-04
Loss = 3.4950e-01, PNorm = 63.4826, GNorm = 1.4067, lr_0 = 1.0709e-04
Loss = 3.5510e-01, PNorm = 63.4819, GNorm = 1.3351, lr_0 = 1.0701e-04
Loss = 3.6759e-01, PNorm = 63.4819, GNorm = 1.6517, lr_0 = 1.0694e-04
Loss = 3.3991e-01, PNorm = 63.4834, GNorm = 1.3932, lr_0 = 1.0687e-04
Loss = 3.7128e-01, PNorm = 63.4855, GNorm = 2.0982, lr_0 = 1.0679e-04
Loss = 3.9901e-01, PNorm = 63.4876, GNorm = 2.5057, lr_0 = 1.0672e-04
Loss = 3.1585e-01, PNorm = 63.4897, GNorm = 1.3568, lr_0 = 1.0665e-04
Loss = 3.6450e-01, PNorm = 63.4903, GNorm = 1.3509, lr_0 = 1.0657e-04
Loss = 3.0461e-01, PNorm = 63.4920, GNorm = 1.2784, lr_0 = 1.0650e-04
Loss = 3.3047e-01, PNorm = 63.4924, GNorm = 1.5834, lr_0 = 1.0643e-04
Loss = 3.3044e-01, PNorm = 63.4929, GNorm = 1.6330, lr_0 = 1.0635e-04
Loss = 3.3354e-01, PNorm = 63.4929, GNorm = 1.9591, lr_0 = 1.0628e-04
Loss = 3.8289e-01, PNorm = 63.4936, GNorm = 1.6983, lr_0 = 1.0621e-04
Loss = 3.5115e-01, PNorm = 63.4946, GNorm = 1.9370, lr_0 = 1.0614e-04
Loss = 3.2142e-01, PNorm = 63.4948, GNorm = 1.3164, lr_0 = 1.0606e-04
Loss = 3.6785e-01, PNorm = 63.4953, GNorm = 1.1703, lr_0 = 1.0599e-04
Loss = 3.6925e-01, PNorm = 63.4956, GNorm = 1.0847, lr_0 = 1.0592e-04
Loss = 3.2540e-01, PNorm = 63.4971, GNorm = 1.1863, lr_0 = 1.0585e-04
Loss = 3.5124e-01, PNorm = 63.4988, GNorm = 1.5667, lr_0 = 1.0577e-04
Loss = 3.2255e-01, PNorm = 63.4993, GNorm = 1.2601, lr_0 = 1.0570e-04
Loss = 3.4898e-01, PNorm = 63.5011, GNorm = 1.3961, lr_0 = 1.0563e-04
Loss = 3.0256e-01, PNorm = 63.5035, GNorm = 1.5068, lr_0 = 1.0556e-04
Loss = 3.4901e-01, PNorm = 63.5047, GNorm = 1.4192, lr_0 = 1.0548e-04
Loss = 3.9259e-01, PNorm = 63.5057, GNorm = 1.3148, lr_0 = 1.0541e-04
Loss = 3.1545e-01, PNorm = 63.5066, GNorm = 1.9830, lr_0 = 1.0534e-04
Loss = 3.4233e-01, PNorm = 63.5063, GNorm = 1.2832, lr_0 = 1.0527e-04
Loss = 3.6502e-01, PNorm = 63.5063, GNorm = 1.2684, lr_0 = 1.0519e-04
Loss = 2.9215e-01, PNorm = 63.5081, GNorm = 0.9396, lr_0 = 1.0512e-04
Loss = 3.6134e-01, PNorm = 63.5101, GNorm = 1.9541, lr_0 = 1.0505e-04
Loss = 2.9838e-01, PNorm = 63.5105, GNorm = 1.6765, lr_0 = 1.0498e-04
Loss = 3.4283e-01, PNorm = 63.5109, GNorm = 1.3374, lr_0 = 1.0491e-04
Loss = 3.2515e-01, PNorm = 63.5113, GNorm = 1.7971, lr_0 = 1.0483e-04
Loss = 3.4197e-01, PNorm = 63.5113, GNorm = 1.2140, lr_0 = 1.0476e-04
Loss = 3.4939e-01, PNorm = 63.5124, GNorm = 1.6447, lr_0 = 1.0469e-04
Loss = 3.3945e-01, PNorm = 63.5127, GNorm = 1.8742, lr_0 = 1.0462e-04
Loss = 3.3169e-01, PNorm = 63.5110, GNorm = 1.6804, lr_0 = 1.0455e-04
Loss = 3.4591e-01, PNorm = 63.5130, GNorm = 1.1073, lr_0 = 1.0448e-04
Loss = 2.7268e-01, PNorm = 63.5141, GNorm = 1.3754, lr_0 = 1.0440e-04
Loss = 3.1843e-01, PNorm = 63.5152, GNorm = 1.0716, lr_0 = 1.0433e-04
Loss = 3.5258e-01, PNorm = 63.5144, GNorm = 1.2696, lr_0 = 1.0426e-04
Loss = 3.4025e-01, PNorm = 63.5158, GNorm = 1.5128, lr_0 = 1.0419e-04
Loss = 3.4863e-01, PNorm = 63.5182, GNorm = 1.4094, lr_0 = 1.0412e-04
Loss = 3.4846e-01, PNorm = 63.5191, GNorm = 1.1555, lr_0 = 1.0405e-04
Loss = 3.3640e-01, PNorm = 63.5197, GNorm = 1.3971, lr_0 = 1.0398e-04
Loss = 3.1914e-01, PNorm = 63.5211, GNorm = 1.8911, lr_0 = 1.0391e-04
Loss = 3.4081e-01, PNorm = 63.5234, GNorm = 1.7775, lr_0 = 1.0383e-04
Loss = 3.3939e-01, PNorm = 63.5246, GNorm = 1.4007, lr_0 = 1.0376e-04
Loss = 2.8872e-01, PNorm = 63.5250, GNorm = 1.2077, lr_0 = 1.0369e-04
Loss = 3.3899e-01, PNorm = 63.5260, GNorm = 1.3242, lr_0 = 1.0362e-04
Loss = 3.7287e-01, PNorm = 63.5279, GNorm = 1.5171, lr_0 = 1.0355e-04
Loss = 3.2524e-01, PNorm = 63.5293, GNorm = 1.3003, lr_0 = 1.0348e-04
Loss = 3.4446e-01, PNorm = 63.5297, GNorm = 1.5356, lr_0 = 1.0341e-04
Loss = 3.1013e-01, PNorm = 63.5297, GNorm = 1.3754, lr_0 = 1.0334e-04
Loss = 3.7944e-01, PNorm = 63.5298, GNorm = 1.8587, lr_0 = 1.0327e-04
Loss = 3.5727e-01, PNorm = 63.5313, GNorm = 2.4063, lr_0 = 1.0320e-04
Loss = 2.9897e-01, PNorm = 63.5330, GNorm = 1.3165, lr_0 = 1.0312e-04
Loss = 3.4751e-01, PNorm = 63.5346, GNorm = 1.4404, lr_0 = 1.0305e-04
Loss = 3.1988e-01, PNorm = 63.5358, GNorm = 1.6171, lr_0 = 1.0298e-04
Loss = 3.0599e-01, PNorm = 63.5370, GNorm = 1.2443, lr_0 = 1.0291e-04
Loss = 3.8575e-01, PNorm = 63.5384, GNorm = 1.1306, lr_0 = 1.0284e-04
Loss = 3.4571e-01, PNorm = 63.5404, GNorm = 1.9607, lr_0 = 1.0277e-04
Loss = 4.5560e-01, PNorm = 63.5396, GNorm = 1.3132, lr_0 = 1.0270e-04
Loss = 3.4740e-01, PNorm = 63.5405, GNorm = 1.2890, lr_0 = 1.0263e-04
Loss = 3.2159e-01, PNorm = 63.5424, GNorm = 1.0294, lr_0 = 1.0256e-04
Loss = 3.3784e-01, PNorm = 63.5426, GNorm = 1.1770, lr_0 = 1.0249e-04
Loss = 3.3664e-01, PNorm = 63.5440, GNorm = 1.9814, lr_0 = 1.0242e-04
Loss = 3.4771e-01, PNorm = 63.5445, GNorm = 1.5477, lr_0 = 1.0235e-04
Loss = 3.4124e-01, PNorm = 63.5431, GNorm = 1.8629, lr_0 = 1.0228e-04
Loss = 3.4134e-01, PNorm = 63.5432, GNorm = 2.0014, lr_0 = 1.0221e-04
Loss = 3.4667e-01, PNorm = 63.5457, GNorm = 1.5236, lr_0 = 1.0214e-04
Loss = 3.4776e-01, PNorm = 63.5456, GNorm = 1.4697, lr_0 = 1.0207e-04
Loss = 3.0955e-01, PNorm = 63.5457, GNorm = 1.0086, lr_0 = 1.0200e-04
Loss = 3.6051e-01, PNorm = 63.5473, GNorm = 2.3918, lr_0 = 1.0193e-04
Loss = 3.2366e-01, PNorm = 63.5481, GNorm = 1.4053, lr_0 = 1.0186e-04
Loss = 3.6121e-01, PNorm = 63.5479, GNorm = 1.8799, lr_0 = 1.0179e-04
Loss = 3.2441e-01, PNorm = 63.5476, GNorm = 1.3943, lr_0 = 1.0172e-04
Loss = 3.4826e-01, PNorm = 63.5477, GNorm = 2.0963, lr_0 = 1.0165e-04
Loss = 3.1663e-01, PNorm = 63.5490, GNorm = 1.4790, lr_0 = 1.0158e-04
Loss = 3.7416e-01, PNorm = 63.5503, GNorm = 2.6513, lr_0 = 1.0151e-04
Loss = 3.5290e-01, PNorm = 63.5513, GNorm = 1.5912, lr_0 = 1.0144e-04
Loss = 3.1520e-01, PNorm = 63.5515, GNorm = 1.5187, lr_0 = 1.0137e-04
Loss = 3.6311e-01, PNorm = 63.5509, GNorm = 1.9532, lr_0 = 1.0130e-04
Loss = 3.5092e-01, PNorm = 63.5512, GNorm = 1.7503, lr_0 = 1.0123e-04
Loss = 3.5803e-01, PNorm = 63.5530, GNorm = 1.7507, lr_0 = 1.0116e-04
Loss = 3.2526e-01, PNorm = 63.5541, GNorm = 1.2994, lr_0 = 1.0110e-04
Loss = 3.3549e-01, PNorm = 63.5552, GNorm = 1.6834, lr_0 = 1.0103e-04
Loss = 3.6281e-01, PNorm = 63.5563, GNorm = 1.5999, lr_0 = 1.0096e-04
Loss = 3.3593e-01, PNorm = 63.5583, GNorm = 1.8165, lr_0 = 1.0089e-04
Loss = 4.0817e-01, PNorm = 63.5579, GNorm = 1.5718, lr_0 = 1.0082e-04
Loss = 3.4660e-01, PNorm = 63.5599, GNorm = 1.2293, lr_0 = 1.0075e-04
Loss = 3.4987e-01, PNorm = 63.5620, GNorm = 2.0044, lr_0 = 1.0068e-04
Loss = 3.2286e-01, PNorm = 63.5622, GNorm = 1.6143, lr_0 = 1.0061e-04
Loss = 3.5867e-01, PNorm = 63.5627, GNorm = 1.5067, lr_0 = 1.0054e-04
Loss = 3.5317e-01, PNorm = 63.5638, GNorm = 1.5387, lr_0 = 1.0047e-04
Loss = 3.5203e-01, PNorm = 63.5655, GNorm = 1.0646, lr_0 = 1.0041e-04
Loss = 3.1243e-01, PNorm = 63.5667, GNorm = 1.3270, lr_0 = 1.0034e-04
Loss = 3.4833e-01, PNorm = 63.5663, GNorm = 1.7145, lr_0 = 1.0027e-04
Loss = 4.0831e-01, PNorm = 63.5674, GNorm = 1.4590, lr_0 = 1.0020e-04
Loss = 3.5232e-01, PNorm = 63.5687, GNorm = 1.0702, lr_0 = 1.0013e-04
Loss = 3.2968e-01, PNorm = 63.5696, GNorm = 1.3630, lr_0 = 1.0006e-04
Loss = 2.8575e-01, PNorm = 63.5712, GNorm = 1.3062, lr_0 = 1.0000e-04
Validation mae = 0.110656
Model 0 best validation mae = 0.110656 on epoch 29
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.110007
Ensemble test mae = 0.110007
Fold 8
Splitting data with seed 8
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=300, bias=False)
        (W_h): Linear(in_features=300, out_features=300, bias=False)
        (W_o): Linear(in_features=433, out_features=300, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.2, inplace=False)
    (1): Linear(in_features=300, out_features=300, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=300, out_features=300, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.2, inplace=False)
    (7): Linear(in_features=300, out_features=1, bias=True)
  )
)
Number of parameters = 445,201
Moving model to cuda
Epoch 0
Loss = 1.0912e+00, PNorm = 38.1705, GNorm = 4.3297, lr_0 = 1.0413e-04
Loss = 1.0154e+00, PNorm = 38.1702, GNorm = 2.3505, lr_0 = 1.0788e-04
Loss = 9.2830e-01, PNorm = 38.1702, GNorm = 3.8843, lr_0 = 1.1163e-04
Loss = 9.8623e-01, PNorm = 38.1706, GNorm = 4.5575, lr_0 = 1.1537e-04
Loss = 8.6913e-01, PNorm = 38.1713, GNorm = 2.2515, lr_0 = 1.1913e-04
Loss = 9.7801e-01, PNorm = 38.1726, GNorm = 2.5393, lr_0 = 1.2287e-04
Loss = 9.1593e-01, PNorm = 38.1738, GNorm = 6.7340, lr_0 = 1.2663e-04
Loss = 1.0024e+00, PNorm = 38.1750, GNorm = 7.0255, lr_0 = 1.3038e-04
Loss = 8.2241e-01, PNorm = 38.1754, GNorm = 3.6409, lr_0 = 1.3413e-04
Loss = 1.0189e+00, PNorm = 38.1757, GNorm = 2.7295, lr_0 = 1.3788e-04
Loss = 9.1401e-01, PNorm = 38.1770, GNorm = 2.3594, lr_0 = 1.4163e-04
Loss = 8.6181e-01, PNorm = 38.1781, GNorm = 2.6119, lr_0 = 1.4537e-04
Loss = 9.6187e-01, PNorm = 38.1793, GNorm = 2.4218, lr_0 = 1.4913e-04
Loss = 9.2430e-01, PNorm = 38.1811, GNorm = 3.1974, lr_0 = 1.5288e-04
Loss = 9.7398e-01, PNorm = 38.1833, GNorm = 3.0224, lr_0 = 1.5662e-04
Loss = 8.9116e-01, PNorm = 38.1862, GNorm = 4.2995, lr_0 = 1.6038e-04
Loss = 9.1698e-01, PNorm = 38.1883, GNorm = 3.8516, lr_0 = 1.6412e-04
Loss = 8.4891e-01, PNorm = 38.1905, GNorm = 2.3223, lr_0 = 1.6788e-04
Loss = 7.9113e-01, PNorm = 38.1930, GNorm = 5.4761, lr_0 = 1.7163e-04
Loss = 7.5798e-01, PNorm = 38.1957, GNorm = 2.3607, lr_0 = 1.7538e-04
Loss = 7.5242e-01, PNorm = 38.1991, GNorm = 5.1921, lr_0 = 1.7913e-04
Loss = 7.7800e-01, PNorm = 38.2028, GNorm = 3.7779, lr_0 = 1.8288e-04
Loss = 8.0357e-01, PNorm = 38.2060, GNorm = 4.0448, lr_0 = 1.8662e-04
Loss = 8.4884e-01, PNorm = 38.2078, GNorm = 6.2404, lr_0 = 1.9038e-04
Loss = 1.0449e+00, PNorm = 38.2101, GNorm = 9.2429, lr_0 = 1.9413e-04
Loss = 8.4074e-01, PNorm = 38.2126, GNorm = 4.4576, lr_0 = 1.9788e-04
Loss = 9.1562e-01, PNorm = 38.2150, GNorm = 2.5032, lr_0 = 2.0163e-04
Loss = 8.5769e-01, PNorm = 38.2172, GNorm = 3.6203, lr_0 = 2.0537e-04
Loss = 7.3519e-01, PNorm = 38.2210, GNorm = 3.7817, lr_0 = 2.0913e-04
Loss = 7.8820e-01, PNorm = 38.2246, GNorm = 3.4572, lr_0 = 2.1288e-04
Loss = 7.7083e-01, PNorm = 38.2278, GNorm = 6.4476, lr_0 = 2.1663e-04
Loss = 8.2787e-01, PNorm = 38.2313, GNorm = 2.6662, lr_0 = 2.2038e-04
Loss = 6.3051e-01, PNorm = 38.2352, GNorm = 6.4880, lr_0 = 2.2412e-04
Loss = 7.3572e-01, PNorm = 38.2368, GNorm = 2.9149, lr_0 = 2.2787e-04
Loss = 6.1514e-01, PNorm = 38.2388, GNorm = 1.4977, lr_0 = 2.3163e-04
Loss = 7.0612e-01, PNorm = 38.2418, GNorm = 9.7074, lr_0 = 2.3538e-04
Loss = 6.9054e-01, PNorm = 38.2441, GNorm = 4.7665, lr_0 = 2.3913e-04
Loss = 6.3872e-01, PNorm = 38.2472, GNorm = 3.3521, lr_0 = 2.4288e-04
Loss = 7.5318e-01, PNorm = 38.2511, GNorm = 6.2441, lr_0 = 2.4662e-04
Loss = 6.5990e-01, PNorm = 38.2548, GNorm = 1.3886, lr_0 = 2.5038e-04
Loss = 7.1125e-01, PNorm = 38.2591, GNorm = 5.9377, lr_0 = 2.5413e-04
Loss = 9.0299e-01, PNorm = 38.2618, GNorm = 2.4538, lr_0 = 2.5788e-04
Loss = 7.7089e-01, PNorm = 38.2665, GNorm = 4.5892, lr_0 = 2.6163e-04
Loss = 6.2151e-01, PNorm = 38.2709, GNorm = 5.9578, lr_0 = 2.6537e-04
Loss = 6.3110e-01, PNorm = 38.2742, GNorm = 4.6041, lr_0 = 2.6912e-04
Loss = 6.1009e-01, PNorm = 38.2769, GNorm = 1.7704, lr_0 = 2.7288e-04
Loss = 6.8426e-01, PNorm = 38.2793, GNorm = 3.1624, lr_0 = 2.7663e-04
Loss = 6.4451e-01, PNorm = 38.2830, GNorm = 3.0603, lr_0 = 2.8038e-04
Loss = 6.9146e-01, PNorm = 38.2856, GNorm = 7.7478, lr_0 = 2.8413e-04
Loss = 6.7549e-01, PNorm = 38.2890, GNorm = 5.4057, lr_0 = 2.8787e-04
Loss = 6.2695e-01, PNorm = 38.2934, GNorm = 2.0498, lr_0 = 2.9163e-04
Loss = 7.0092e-01, PNorm = 38.2958, GNorm = 5.3024, lr_0 = 2.9538e-04
Loss = 6.9694e-01, PNorm = 38.2973, GNorm = 1.5603, lr_0 = 2.9913e-04
Loss = 6.8504e-01, PNorm = 38.2996, GNorm = 1.6583, lr_0 = 3.0288e-04
Loss = 6.7540e-01, PNorm = 38.3039, GNorm = 5.2177, lr_0 = 3.0662e-04
Loss = 6.5449e-01, PNorm = 38.3081, GNorm = 4.3165, lr_0 = 3.1037e-04
Loss = 6.7021e-01, PNorm = 38.3113, GNorm = 1.0396, lr_0 = 3.1413e-04
Loss = 6.7849e-01, PNorm = 38.3153, GNorm = 2.6780, lr_0 = 3.1788e-04
Loss = 6.2429e-01, PNorm = 38.3214, GNorm = 1.3374, lr_0 = 3.2163e-04
Loss = 7.4730e-01, PNorm = 38.3259, GNorm = 1.2374, lr_0 = 3.2538e-04
Loss = 6.9065e-01, PNorm = 38.3309, GNorm = 2.6363, lr_0 = 3.2912e-04
Loss = 6.6105e-01, PNorm = 38.3362, GNorm = 1.8647, lr_0 = 3.3288e-04
Loss = 7.0192e-01, PNorm = 38.3413, GNorm = 3.4471, lr_0 = 3.3663e-04
Loss = 6.8515e-01, PNorm = 38.3438, GNorm = 1.2942, lr_0 = 3.4038e-04
Loss = 6.7210e-01, PNorm = 38.3487, GNorm = 4.8721, lr_0 = 3.4413e-04
Loss = 7.8634e-01, PNorm = 38.3555, GNorm = 9.7105, lr_0 = 3.4787e-04
Loss = 7.7477e-01, PNorm = 38.3589, GNorm = 4.3785, lr_0 = 3.5162e-04
Loss = 7.0530e-01, PNorm = 38.3667, GNorm = 4.8455, lr_0 = 3.5538e-04
Loss = 6.7609e-01, PNorm = 38.3745, GNorm = 7.3790, lr_0 = 3.5913e-04
Loss = 6.9866e-01, PNorm = 38.3760, GNorm = 2.5573, lr_0 = 3.6288e-04
Loss = 7.0473e-01, PNorm = 38.3813, GNorm = 2.3501, lr_0 = 3.6662e-04
Loss = 6.3288e-01, PNorm = 38.3866, GNorm = 2.4395, lr_0 = 3.7037e-04
Loss = 6.5199e-01, PNorm = 38.3919, GNorm = 3.3770, lr_0 = 3.7413e-04
Loss = 6.6918e-01, PNorm = 38.3998, GNorm = 7.7706, lr_0 = 3.7788e-04
Loss = 7.1416e-01, PNorm = 38.4036, GNorm = 7.6624, lr_0 = 3.8163e-04
Loss = 6.1163e-01, PNorm = 38.4124, GNorm = 3.2033, lr_0 = 3.8537e-04
Loss = 6.1799e-01, PNorm = 38.4200, GNorm = 1.4592, lr_0 = 3.8912e-04
Loss = 6.0198e-01, PNorm = 38.4245, GNorm = 1.4767, lr_0 = 3.9287e-04
Loss = 6.2128e-01, PNorm = 38.4293, GNorm = 3.7320, lr_0 = 3.9663e-04
Loss = 5.9234e-01, PNorm = 38.4334, GNorm = 1.3315, lr_0 = 4.0038e-04
Loss = 7.3230e-01, PNorm = 38.4402, GNorm = 3.7307, lr_0 = 4.0413e-04
Loss = 7.3805e-01, PNorm = 38.4452, GNorm = 4.5602, lr_0 = 4.0787e-04
Loss = 6.5292e-01, PNorm = 38.4527, GNorm = 3.7576, lr_0 = 4.1162e-04
Loss = 6.6630e-01, PNorm = 38.4637, GNorm = 1.8188, lr_0 = 4.1537e-04
Loss = 7.0480e-01, PNorm = 38.4706, GNorm = 6.6456, lr_0 = 4.1913e-04
Loss = 6.4288e-01, PNorm = 38.4767, GNorm = 3.0068, lr_0 = 4.2288e-04
Loss = 7.1014e-01, PNorm = 38.4817, GNorm = 4.3250, lr_0 = 4.2662e-04
Loss = 6.5574e-01, PNorm = 38.4926, GNorm = 2.9556, lr_0 = 4.3037e-04
Loss = 6.7215e-01, PNorm = 38.5027, GNorm = 2.2416, lr_0 = 4.3412e-04
Loss = 6.0961e-01, PNorm = 38.5092, GNorm = 3.2337, lr_0 = 4.3788e-04
Loss = 6.4108e-01, PNorm = 38.5167, GNorm = 2.8986, lr_0 = 4.4163e-04
Loss = 6.4100e-01, PNorm = 38.5245, GNorm = 5.2775, lr_0 = 4.4538e-04
Loss = 6.3838e-01, PNorm = 38.5372, GNorm = 1.3318, lr_0 = 4.4912e-04
Loss = 6.6932e-01, PNorm = 38.5424, GNorm = 1.7274, lr_0 = 4.5287e-04
Loss = 6.3279e-01, PNorm = 38.5462, GNorm = 4.0880, lr_0 = 4.5662e-04
Loss = 6.4886e-01, PNorm = 38.5516, GNorm = 3.9886, lr_0 = 4.6038e-04
Loss = 6.2866e-01, PNorm = 38.5583, GNorm = 3.9365, lr_0 = 4.6413e-04
Loss = 6.1806e-01, PNorm = 38.5618, GNorm = 1.8988, lr_0 = 4.6787e-04
Loss = 6.4774e-01, PNorm = 38.5686, GNorm = 1.6806, lr_0 = 4.7162e-04
Loss = 6.8410e-01, PNorm = 38.5787, GNorm = 1.3814, lr_0 = 4.7537e-04
Loss = 5.8008e-01, PNorm = 38.5863, GNorm = 1.9995, lr_0 = 4.7913e-04
Loss = 6.3294e-01, PNorm = 38.5961, GNorm = 1.4655, lr_0 = 4.8288e-04
Loss = 6.2953e-01, PNorm = 38.6076, GNorm = 2.3590, lr_0 = 4.8663e-04
Loss = 6.7855e-01, PNorm = 38.6165, GNorm = 2.6169, lr_0 = 4.9038e-04
Loss = 6.4628e-01, PNorm = 38.6256, GNorm = 2.3503, lr_0 = 4.9412e-04
Loss = 5.6954e-01, PNorm = 38.6357, GNorm = 5.6696, lr_0 = 4.9788e-04
Loss = 6.1730e-01, PNorm = 38.6459, GNorm = 3.4692, lr_0 = 5.0163e-04
Loss = 6.1833e-01, PNorm = 38.6540, GNorm = 3.9117, lr_0 = 5.0538e-04
Loss = 6.7744e-01, PNorm = 38.6641, GNorm = 5.4664, lr_0 = 5.0913e-04
Loss = 6.2146e-01, PNorm = 38.6713, GNorm = 0.9154, lr_0 = 5.1287e-04
Loss = 7.1669e-01, PNorm = 38.6813, GNorm = 4.9284, lr_0 = 5.1663e-04
Loss = 5.5412e-01, PNorm = 38.6946, GNorm = 3.5274, lr_0 = 5.2038e-04
Loss = 7.2287e-01, PNorm = 38.6998, GNorm = 1.3291, lr_0 = 5.2413e-04
Loss = 6.3303e-01, PNorm = 38.7106, GNorm = 1.9690, lr_0 = 5.2788e-04
Loss = 5.7669e-01, PNorm = 38.7201, GNorm = 1.8577, lr_0 = 5.3162e-04
Loss = 5.5915e-01, PNorm = 38.7298, GNorm = 2.5459, lr_0 = 5.3538e-04
Loss = 5.9188e-01, PNorm = 38.7384, GNorm = 1.3641, lr_0 = 5.3912e-04
Loss = 6.6832e-01, PNorm = 38.7442, GNorm = 1.6411, lr_0 = 5.4288e-04
Loss = 6.1696e-01, PNorm = 38.7538, GNorm = 3.3023, lr_0 = 5.4663e-04
Loss = 5.8452e-01, PNorm = 38.7660, GNorm = 2.4205, lr_0 = 5.5038e-04
Validation mae = 0.140507
Epoch 1
Loss = 5.9548e-01, PNorm = 38.7777, GNorm = 1.5728, lr_0 = 5.5413e-04
Loss = 6.0666e-01, PNorm = 38.7894, GNorm = 8.3659, lr_0 = 5.5787e-04
Loss = 6.7352e-01, PNorm = 38.7976, GNorm = 8.1326, lr_0 = 5.6163e-04
Loss = 6.5303e-01, PNorm = 38.8101, GNorm = 6.2463, lr_0 = 5.6538e-04
Loss = 6.3324e-01, PNorm = 38.8203, GNorm = 2.3231, lr_0 = 5.6913e-04
Loss = 6.1902e-01, PNorm = 38.8284, GNorm = 2.3684, lr_0 = 5.7288e-04
Loss = 5.7235e-01, PNorm = 38.8414, GNorm = 1.1886, lr_0 = 5.7662e-04
Loss = 5.9597e-01, PNorm = 38.8552, GNorm = 4.6343, lr_0 = 5.8038e-04
Loss = 6.4889e-01, PNorm = 38.8621, GNorm = 2.7630, lr_0 = 5.8413e-04
Loss = 6.4685e-01, PNorm = 38.8687, GNorm = 1.3152, lr_0 = 5.8788e-04
Loss = 5.9619e-01, PNorm = 38.8814, GNorm = 5.3965, lr_0 = 5.9163e-04
Loss = 7.1425e-01, PNorm = 38.8901, GNorm = 1.2503, lr_0 = 5.9538e-04
Loss = 6.0205e-01, PNorm = 38.9002, GNorm = 4.1600, lr_0 = 5.9913e-04
Loss = 5.6059e-01, PNorm = 38.9102, GNorm = 3.0801, lr_0 = 6.0288e-04
Loss = 5.9994e-01, PNorm = 38.9131, GNorm = 5.4741, lr_0 = 6.0663e-04
Loss = 5.4928e-01, PNorm = 38.9244, GNorm = 0.8367, lr_0 = 6.1038e-04
Loss = 5.6777e-01, PNorm = 38.9374, GNorm = 2.6423, lr_0 = 6.1413e-04
Loss = 5.5470e-01, PNorm = 38.9498, GNorm = 4.5955, lr_0 = 6.1788e-04
Loss = 6.5671e-01, PNorm = 38.9601, GNorm = 4.1783, lr_0 = 6.2163e-04
Loss = 5.8073e-01, PNorm = 38.9633, GNorm = 5.4015, lr_0 = 6.2538e-04
Loss = 5.6352e-01, PNorm = 38.9719, GNorm = 4.3382, lr_0 = 6.2913e-04
Loss = 5.9779e-01, PNorm = 38.9870, GNorm = 4.1873, lr_0 = 6.3288e-04
Loss = 5.2516e-01, PNorm = 39.0005, GNorm = 4.9222, lr_0 = 6.3663e-04
Loss = 6.3719e-01, PNorm = 39.0114, GNorm = 2.0695, lr_0 = 6.4038e-04
Loss = 6.6391e-01, PNorm = 39.0169, GNorm = 1.1473, lr_0 = 6.4413e-04
Loss = 5.8549e-01, PNorm = 39.0378, GNorm = 1.2014, lr_0 = 6.4788e-04
Loss = 6.4629e-01, PNorm = 39.0530, GNorm = 1.2606, lr_0 = 6.5163e-04
Loss = 6.6059e-01, PNorm = 39.0658, GNorm = 2.1401, lr_0 = 6.5538e-04
Loss = 6.7431e-01, PNorm = 39.0785, GNorm = 4.2592, lr_0 = 6.5913e-04
Loss = 7.1591e-01, PNorm = 39.1000, GNorm = 1.0319, lr_0 = 6.6288e-04
Loss = 5.8325e-01, PNorm = 39.1189, GNorm = 1.6584, lr_0 = 6.6663e-04
Loss = 6.6093e-01, PNorm = 39.1369, GNorm = 1.2592, lr_0 = 6.7038e-04
Loss = 5.7126e-01, PNorm = 39.1468, GNorm = 4.9467, lr_0 = 6.7413e-04
Loss = 6.2472e-01, PNorm = 39.1540, GNorm = 2.5702, lr_0 = 6.7788e-04
Loss = 4.9205e-01, PNorm = 39.1666, GNorm = 1.8519, lr_0 = 6.8163e-04
Loss = 6.2433e-01, PNorm = 39.1793, GNorm = 3.3720, lr_0 = 6.8538e-04
Loss = 6.4739e-01, PNorm = 39.1922, GNorm = 2.7126, lr_0 = 6.8913e-04
Loss = 5.9712e-01, PNorm = 39.2082, GNorm = 5.4758, lr_0 = 6.9288e-04
Loss = 6.6845e-01, PNorm = 39.2258, GNorm = 1.8703, lr_0 = 6.9663e-04
Loss = 6.0177e-01, PNorm = 39.2433, GNorm = 3.7184, lr_0 = 7.0038e-04
Loss = 6.7427e-01, PNorm = 39.2572, GNorm = 8.0347, lr_0 = 7.0413e-04
Loss = 5.6347e-01, PNorm = 39.2639, GNorm = 1.4092, lr_0 = 7.0788e-04
Loss = 6.4957e-01, PNorm = 39.2784, GNorm = 2.1098, lr_0 = 7.1163e-04
Loss = 6.6784e-01, PNorm = 39.2962, GNorm = 1.9051, lr_0 = 7.1538e-04
Loss = 5.9515e-01, PNorm = 39.3152, GNorm = 1.6651, lr_0 = 7.1913e-04
Loss = 7.2556e-01, PNorm = 39.3323, GNorm = 4.5527, lr_0 = 7.2288e-04
Loss = 5.8240e-01, PNorm = 39.3482, GNorm = 1.6288, lr_0 = 7.2663e-04
Loss = 5.4333e-01, PNorm = 39.3644, GNorm = 2.4540, lr_0 = 7.3038e-04
Loss = 6.2633e-01, PNorm = 39.3808, GNorm = 3.0913, lr_0 = 7.3413e-04
Loss = 6.4210e-01, PNorm = 39.3992, GNorm = 4.2946, lr_0 = 7.3788e-04
Loss = 6.5898e-01, PNorm = 39.4180, GNorm = 2.2645, lr_0 = 7.4163e-04
Loss = 6.0423e-01, PNorm = 39.4381, GNorm = 1.5595, lr_0 = 7.4538e-04
Loss = 6.3853e-01, PNorm = 39.4593, GNorm = 2.4024, lr_0 = 7.4913e-04
Loss = 5.1787e-01, PNorm = 39.4801, GNorm = 0.9893, lr_0 = 7.5288e-04
Loss = 5.7250e-01, PNorm = 39.4956, GNorm = 2.0806, lr_0 = 7.5663e-04
Loss = 6.1992e-01, PNorm = 39.5124, GNorm = 3.0310, lr_0 = 7.6038e-04
Loss = 6.1792e-01, PNorm = 39.5280, GNorm = 3.6054, lr_0 = 7.6413e-04
Loss = 5.8374e-01, PNorm = 39.5464, GNorm = 1.3672, lr_0 = 7.6788e-04
Loss = 5.7544e-01, PNorm = 39.5560, GNorm = 3.7095, lr_0 = 7.7163e-04
Loss = 5.8131e-01, PNorm = 39.5644, GNorm = 2.7390, lr_0 = 7.7538e-04
Loss = 6.1549e-01, PNorm = 39.5800, GNorm = 3.4367, lr_0 = 7.7913e-04
Loss = 5.6686e-01, PNorm = 39.5944, GNorm = 5.5510, lr_0 = 7.8288e-04
Loss = 6.1591e-01, PNorm = 39.6080, GNorm = 3.7123, lr_0 = 7.8663e-04
Loss = 4.8574e-01, PNorm = 39.6265, GNorm = 1.4112, lr_0 = 7.9038e-04
Loss = 5.2241e-01, PNorm = 39.6460, GNorm = 1.3846, lr_0 = 7.9413e-04
Loss = 5.5573e-01, PNorm = 39.6650, GNorm = 3.7545, lr_0 = 7.9788e-04
Loss = 6.1984e-01, PNorm = 39.6742, GNorm = 1.1532, lr_0 = 8.0163e-04
Loss = 5.5602e-01, PNorm = 39.6931, GNorm = 1.7271, lr_0 = 8.0538e-04
Loss = 5.8633e-01, PNorm = 39.7039, GNorm = 4.2410, lr_0 = 8.0913e-04
Loss = 6.1642e-01, PNorm = 39.7211, GNorm = 2.0512, lr_0 = 8.1288e-04
Loss = 5.8667e-01, PNorm = 39.7374, GNorm = 2.1239, lr_0 = 8.1663e-04
Loss = 5.5908e-01, PNorm = 39.7654, GNorm = 1.1124, lr_0 = 8.2038e-04
Loss = 6.6751e-01, PNorm = 39.7736, GNorm = 1.0957, lr_0 = 8.2413e-04
Loss = 6.6522e-01, PNorm = 39.7904, GNorm = 2.7754, lr_0 = 8.2788e-04
Loss = 6.4184e-01, PNorm = 39.8115, GNorm = 1.5542, lr_0 = 8.3163e-04
Loss = 5.0514e-01, PNorm = 39.8325, GNorm = 1.4073, lr_0 = 8.3538e-04
Loss = 6.2743e-01, PNorm = 39.8500, GNorm = 1.2832, lr_0 = 8.3913e-04
Loss = 5.3348e-01, PNorm = 39.8762, GNorm = 2.5952, lr_0 = 8.4288e-04
Loss = 4.9405e-01, PNorm = 39.9011, GNorm = 3.0947, lr_0 = 8.4663e-04
Loss = 5.8793e-01, PNorm = 39.9219, GNorm = 1.3503, lr_0 = 8.5038e-04
Loss = 5.8009e-01, PNorm = 39.9357, GNorm = 2.1658, lr_0 = 8.5413e-04
Loss = 6.0626e-01, PNorm = 39.9510, GNorm = 3.5922, lr_0 = 8.5788e-04
Loss = 5.3762e-01, PNorm = 39.9611, GNorm = 2.1457, lr_0 = 8.6163e-04
Loss = 5.9524e-01, PNorm = 39.9810, GNorm = 2.7816, lr_0 = 8.6538e-04
Loss = 6.4823e-01, PNorm = 40.0050, GNorm = 3.0743, lr_0 = 8.6913e-04
Loss = 6.2146e-01, PNorm = 40.0230, GNorm = 1.3230, lr_0 = 8.7288e-04
Loss = 5.4069e-01, PNorm = 40.0365, GNorm = 3.5037, lr_0 = 8.7663e-04
Loss = 5.6852e-01, PNorm = 40.0548, GNorm = 1.3269, lr_0 = 8.8038e-04
Loss = 6.6397e-01, PNorm = 40.0691, GNorm = 3.0641, lr_0 = 8.8413e-04
Loss = 6.3149e-01, PNorm = 40.0695, GNorm = 1.5215, lr_0 = 8.8788e-04
Loss = 5.2520e-01, PNorm = 40.0915, GNorm = 1.2122, lr_0 = 8.9163e-04
Loss = 6.0898e-01, PNorm = 40.1097, GNorm = 1.5572, lr_0 = 8.9538e-04
Loss = 5.6491e-01, PNorm = 40.1360, GNorm = 6.5031, lr_0 = 8.9913e-04
Loss = 6.5194e-01, PNorm = 40.1557, GNorm = 1.1039, lr_0 = 9.0288e-04
Loss = 5.6989e-01, PNorm = 40.1766, GNorm = 1.5322, lr_0 = 9.0663e-04
Loss = 5.2734e-01, PNorm = 40.1939, GNorm = 4.0005, lr_0 = 9.1038e-04
Loss = 5.9622e-01, PNorm = 40.2100, GNorm = 1.6076, lr_0 = 9.1413e-04
Loss = 5.4037e-01, PNorm = 40.2248, GNorm = 1.6321, lr_0 = 9.1788e-04
Loss = 5.9025e-01, PNorm = 40.2411, GNorm = 2.0078, lr_0 = 9.2163e-04
Loss = 5.2902e-01, PNorm = 40.2549, GNorm = 1.4566, lr_0 = 9.2538e-04
Loss = 5.8359e-01, PNorm = 40.2716, GNorm = 1.6818, lr_0 = 9.2913e-04
Loss = 6.9788e-01, PNorm = 40.2924, GNorm = 5.0661, lr_0 = 9.3288e-04
Loss = 4.9383e-01, PNorm = 40.3067, GNorm = 1.0473, lr_0 = 9.3663e-04
Loss = 5.1245e-01, PNorm = 40.3241, GNorm = 0.9178, lr_0 = 9.4038e-04
Loss = 6.2313e-01, PNorm = 40.3529, GNorm = 3.3643, lr_0 = 9.4413e-04
Loss = 5.7724e-01, PNorm = 40.3862, GNorm = 2.2678, lr_0 = 9.4788e-04
Loss = 5.5547e-01, PNorm = 40.4072, GNorm = 1.8708, lr_0 = 9.5163e-04
Loss = 5.8995e-01, PNorm = 40.4239, GNorm = 3.7376, lr_0 = 9.5538e-04
Loss = 6.2214e-01, PNorm = 40.4422, GNorm = 1.2673, lr_0 = 9.5913e-04
Loss = 6.1712e-01, PNorm = 40.4693, GNorm = 1.3829, lr_0 = 9.6288e-04
Loss = 5.3005e-01, PNorm = 40.4932, GNorm = 2.2831, lr_0 = 9.6663e-04
Loss = 5.3089e-01, PNorm = 40.5065, GNorm = 2.7622, lr_0 = 9.7038e-04
Loss = 5.9224e-01, PNorm = 40.5233, GNorm = 1.1765, lr_0 = 9.7413e-04
Loss = 6.5358e-01, PNorm = 40.5439, GNorm = 3.3700, lr_0 = 9.7788e-04
Loss = 5.6196e-01, PNorm = 40.5644, GNorm = 2.1651, lr_0 = 9.8163e-04
Loss = 5.6975e-01, PNorm = 40.5834, GNorm = 1.3961, lr_0 = 9.8537e-04
Loss = 6.2151e-01, PNorm = 40.6106, GNorm = 2.0118, lr_0 = 9.8912e-04
Loss = 5.1949e-01, PNorm = 40.6291, GNorm = 1.4725, lr_0 = 9.9288e-04
Loss = 5.0562e-01, PNorm = 40.6457, GNorm = 1.1962, lr_0 = 9.9663e-04
Loss = 5.5440e-01, PNorm = 40.6630, GNorm = 1.5594, lr_0 = 9.9993e-04
Validation mae = 0.129030
Epoch 2
Loss = 5.7504e-01, PNorm = 40.6836, GNorm = 1.4131, lr_0 = 9.9925e-04
Loss = 5.2936e-01, PNorm = 40.7047, GNorm = 1.5479, lr_0 = 9.9856e-04
Loss = 6.0457e-01, PNorm = 40.7160, GNorm = 2.0263, lr_0 = 9.9788e-04
Loss = 4.6323e-01, PNorm = 40.7364, GNorm = 0.9703, lr_0 = 9.9719e-04
Loss = 5.5812e-01, PNorm = 40.7607, GNorm = 4.1008, lr_0 = 9.9651e-04
Loss = 5.6518e-01, PNorm = 40.7817, GNorm = 2.8811, lr_0 = 9.9583e-04
Loss = 6.0632e-01, PNorm = 40.8156, GNorm = 1.7662, lr_0 = 9.9515e-04
Loss = 5.6216e-01, PNorm = 40.8479, GNorm = 1.1865, lr_0 = 9.9446e-04
Loss = 5.7283e-01, PNorm = 40.8743, GNorm = 1.7560, lr_0 = 9.9378e-04
Loss = 5.6505e-01, PNorm = 40.8929, GNorm = 2.4037, lr_0 = 9.9310e-04
Loss = 5.4839e-01, PNorm = 40.9070, GNorm = 3.1973, lr_0 = 9.9242e-04
Loss = 5.1928e-01, PNorm = 40.9322, GNorm = 1.7104, lr_0 = 9.9174e-04
Loss = 6.1182e-01, PNorm = 40.9532, GNorm = 2.2743, lr_0 = 9.9106e-04
Loss = 6.3512e-01, PNorm = 40.9788, GNorm = 4.9694, lr_0 = 9.9038e-04
Loss = 6.1862e-01, PNorm = 41.0038, GNorm = 1.0476, lr_0 = 9.8971e-04
Loss = 5.9551e-01, PNorm = 41.0394, GNorm = 1.7926, lr_0 = 9.8903e-04
Loss = 5.8474e-01, PNorm = 41.0666, GNorm = 1.9890, lr_0 = 9.8835e-04
Loss = 5.6909e-01, PNorm = 41.1012, GNorm = 0.9102, lr_0 = 9.8767e-04
Loss = 5.8470e-01, PNorm = 41.1211, GNorm = 1.1663, lr_0 = 9.8700e-04
Loss = 5.7609e-01, PNorm = 41.1419, GNorm = 1.3076, lr_0 = 9.8632e-04
Loss = 4.9880e-01, PNorm = 41.1666, GNorm = 1.3028, lr_0 = 9.8564e-04
Loss = 4.8381e-01, PNorm = 41.1836, GNorm = 1.2586, lr_0 = 9.8497e-04
Loss = 6.6129e-01, PNorm = 41.2086, GNorm = 4.1355, lr_0 = 9.8429e-04
Loss = 4.2912e-01, PNorm = 41.2272, GNorm = 1.5581, lr_0 = 9.8362e-04
Loss = 4.5973e-01, PNorm = 41.2451, GNorm = 1.9805, lr_0 = 9.8295e-04
Loss = 5.7903e-01, PNorm = 41.2641, GNorm = 1.4541, lr_0 = 9.8227e-04
Loss = 5.1329e-01, PNorm = 41.2786, GNorm = 2.6844, lr_0 = 9.8160e-04
Loss = 5.2097e-01, PNorm = 41.2982, GNorm = 2.0249, lr_0 = 9.8093e-04
Loss = 5.1811e-01, PNorm = 41.3165, GNorm = 1.7754, lr_0 = 9.8026e-04
Loss = 5.2127e-01, PNorm = 41.3335, GNorm = 1.0635, lr_0 = 9.7958e-04
Loss = 5.6629e-01, PNorm = 41.3504, GNorm = 1.5280, lr_0 = 9.7891e-04
Loss = 5.6926e-01, PNorm = 41.3667, GNorm = 1.4401, lr_0 = 9.7824e-04
Loss = 5.2923e-01, PNorm = 41.3866, GNorm = 3.6919, lr_0 = 9.7757e-04
Loss = 4.5281e-01, PNorm = 41.3931, GNorm = 1.5112, lr_0 = 9.7690e-04
Loss = 6.1678e-01, PNorm = 41.4091, GNorm = 0.9874, lr_0 = 9.7623e-04
Loss = 5.4135e-01, PNorm = 41.4307, GNorm = 2.0839, lr_0 = 9.7556e-04
Loss = 5.5674e-01, PNorm = 41.4491, GNorm = 2.8165, lr_0 = 9.7490e-04
Loss = 5.6681e-01, PNorm = 41.4631, GNorm = 1.0980, lr_0 = 9.7423e-04
Loss = 5.2005e-01, PNorm = 41.4762, GNorm = 0.9593, lr_0 = 9.7356e-04
Loss = 5.8023e-01, PNorm = 41.4910, GNorm = 3.1436, lr_0 = 9.7289e-04
Loss = 5.5737e-01, PNorm = 41.5198, GNorm = 1.2608, lr_0 = 9.7223e-04
Loss = 5.5182e-01, PNorm = 41.5416, GNorm = 5.0346, lr_0 = 9.7156e-04
Loss = 5.5872e-01, PNorm = 41.5694, GNorm = 0.6752, lr_0 = 9.7090e-04
Loss = 5.9783e-01, PNorm = 41.5960, GNorm = 1.1732, lr_0 = 9.7023e-04
Loss = 5.2041e-01, PNorm = 41.6094, GNorm = 3.9918, lr_0 = 9.6957e-04
Loss = 5.3138e-01, PNorm = 41.6229, GNorm = 1.9578, lr_0 = 9.6890e-04
Loss = 5.7002e-01, PNorm = 41.6449, GNorm = 0.9921, lr_0 = 9.6824e-04
Loss = 5.3524e-01, PNorm = 41.6702, GNorm = 3.7992, lr_0 = 9.6757e-04
Loss = 5.7097e-01, PNorm = 41.6933, GNorm = 1.8060, lr_0 = 9.6691e-04
Loss = 5.3177e-01, PNorm = 41.7141, GNorm = 0.9695, lr_0 = 9.6625e-04
Loss = 4.7948e-01, PNorm = 41.7477, GNorm = 1.0381, lr_0 = 9.6559e-04
Loss = 5.5823e-01, PNorm = 41.7780, GNorm = 1.1094, lr_0 = 9.6493e-04
Loss = 4.8821e-01, PNorm = 41.8036, GNorm = 3.2429, lr_0 = 9.6427e-04
Loss = 6.3617e-01, PNorm = 41.8157, GNorm = 1.8409, lr_0 = 9.6360e-04
Loss = 5.5980e-01, PNorm = 41.8411, GNorm = 1.5693, lr_0 = 9.6294e-04
Loss = 5.0855e-01, PNorm = 41.8623, GNorm = 1.0522, lr_0 = 9.6228e-04
Loss = 5.4733e-01, PNorm = 41.8834, GNorm = 1.4728, lr_0 = 9.6163e-04
Loss = 5.7422e-01, PNorm = 41.8993, GNorm = 0.9468, lr_0 = 9.6097e-04
Loss = 5.9359e-01, PNorm = 41.9226, GNorm = 2.6924, lr_0 = 9.6031e-04
Loss = 5.7840e-01, PNorm = 41.9449, GNorm = 1.2448, lr_0 = 9.5965e-04
Loss = 4.9137e-01, PNorm = 41.9640, GNorm = 2.9955, lr_0 = 9.5899e-04
Loss = 5.6432e-01, PNorm = 41.9884, GNorm = 2.2487, lr_0 = 9.5834e-04
Loss = 5.0275e-01, PNorm = 42.0102, GNorm = 1.5569, lr_0 = 9.5768e-04
Loss = 5.7779e-01, PNorm = 42.0321, GNorm = 1.6391, lr_0 = 9.5702e-04
Loss = 6.5103e-01, PNorm = 42.0525, GNorm = 4.6499, lr_0 = 9.5637e-04
Loss = 5.3888e-01, PNorm = 42.0784, GNorm = 1.8860, lr_0 = 9.5571e-04
Loss = 5.7097e-01, PNorm = 42.0987, GNorm = 2.7778, lr_0 = 9.5506e-04
Loss = 5.4798e-01, PNorm = 42.1229, GNorm = 3.5945, lr_0 = 9.5440e-04
Loss = 6.2132e-01, PNorm = 42.1437, GNorm = 1.0663, lr_0 = 9.5375e-04
Loss = 5.5807e-01, PNorm = 42.1765, GNorm = 2.0015, lr_0 = 9.5310e-04
Loss = 5.5656e-01, PNorm = 42.1949, GNorm = 1.9394, lr_0 = 9.5244e-04
Loss = 5.2174e-01, PNorm = 42.2109, GNorm = 1.5742, lr_0 = 9.5179e-04
Loss = 5.1816e-01, PNorm = 42.2272, GNorm = 2.2676, lr_0 = 9.5114e-04
Loss = 5.9016e-01, PNorm = 42.2471, GNorm = 2.4530, lr_0 = 9.5049e-04
Loss = 6.5003e-01, PNorm = 42.2693, GNorm = 1.8291, lr_0 = 9.4984e-04
Loss = 5.4369e-01, PNorm = 42.2952, GNorm = 3.7181, lr_0 = 9.4919e-04
Loss = 5.8700e-01, PNorm = 42.3171, GNorm = 1.4301, lr_0 = 9.4854e-04
Loss = 4.6968e-01, PNorm = 42.3533, GNorm = 1.1074, lr_0 = 9.4789e-04
Loss = 6.4250e-01, PNorm = 42.3867, GNorm = 4.2216, lr_0 = 9.4724e-04
Loss = 5.7611e-01, PNorm = 42.4155, GNorm = 1.6799, lr_0 = 9.4659e-04
Loss = 5.1738e-01, PNorm = 42.4329, GNorm = 1.9103, lr_0 = 9.4594e-04
Loss = 5.4154e-01, PNorm = 42.4515, GNorm = 4.6182, lr_0 = 9.4529e-04
Loss = 4.8627e-01, PNorm = 42.4742, GNorm = 1.3354, lr_0 = 9.4464e-04
Loss = 5.3351e-01, PNorm = 42.4949, GNorm = 1.5364, lr_0 = 9.4400e-04
Loss = 5.3147e-01, PNorm = 42.5089, GNorm = 1.0292, lr_0 = 9.4335e-04
Loss = 5.6436e-01, PNorm = 42.5283, GNorm = 2.6737, lr_0 = 9.4270e-04
Loss = 4.8241e-01, PNorm = 42.5444, GNorm = 1.0685, lr_0 = 9.4206e-04
Loss = 5.6260e-01, PNorm = 42.5625, GNorm = 1.5978, lr_0 = 9.4141e-04
Loss = 4.9874e-01, PNorm = 42.5823, GNorm = 0.9702, lr_0 = 9.4077e-04
Loss = 5.1681e-01, PNorm = 42.5979, GNorm = 2.2313, lr_0 = 9.4012e-04
Loss = 5.6017e-01, PNorm = 42.6130, GNorm = 2.4429, lr_0 = 9.3948e-04
Loss = 5.6903e-01, PNorm = 42.6360, GNorm = 2.6026, lr_0 = 9.3884e-04
Loss = 5.4264e-01, PNorm = 42.6472, GNorm = 1.4816, lr_0 = 9.3819e-04
Loss = 4.8425e-01, PNorm = 42.6614, GNorm = 1.1339, lr_0 = 9.3755e-04
Loss = 5.6596e-01, PNorm = 42.6886, GNorm = 1.9164, lr_0 = 9.3691e-04
Loss = 6.0238e-01, PNorm = 42.7073, GNorm = 2.7645, lr_0 = 9.3627e-04
Loss = 4.9783e-01, PNorm = 42.7288, GNorm = 1.2089, lr_0 = 9.3562e-04
Loss = 5.0607e-01, PNorm = 42.7486, GNorm = 2.4592, lr_0 = 9.3498e-04
Loss = 5.6592e-01, PNorm = 42.7689, GNorm = 2.3134, lr_0 = 9.3434e-04
Loss = 4.8948e-01, PNorm = 42.7830, GNorm = 2.2996, lr_0 = 9.3370e-04
Loss = 5.2303e-01, PNorm = 42.7991, GNorm = 1.4185, lr_0 = 9.3306e-04
Loss = 5.0463e-01, PNorm = 42.8111, GNorm = 0.9830, lr_0 = 9.3242e-04
Loss = 5.3186e-01, PNorm = 42.8393, GNorm = 1.4952, lr_0 = 9.3178e-04
Loss = 5.6956e-01, PNorm = 42.8545, GNorm = 1.7293, lr_0 = 9.3115e-04
Loss = 5.0288e-01, PNorm = 42.8795, GNorm = 2.7037, lr_0 = 9.3051e-04
Loss = 5.1308e-01, PNorm = 42.9032, GNorm = 1.8954, lr_0 = 9.2987e-04
Loss = 4.9212e-01, PNorm = 42.9134, GNorm = 2.4187, lr_0 = 9.2923e-04
Loss = 5.6645e-01, PNorm = 42.9250, GNorm = 1.7680, lr_0 = 9.2860e-04
Loss = 5.8001e-01, PNorm = 42.9434, GNorm = 2.1232, lr_0 = 9.2796e-04
Loss = 5.2226e-01, PNorm = 42.9676, GNorm = 1.3692, lr_0 = 9.2733e-04
Loss = 5.0210e-01, PNorm = 42.9905, GNorm = 1.0851, lr_0 = 9.2669e-04
Loss = 5.0764e-01, PNorm = 43.0094, GNorm = 1.4704, lr_0 = 9.2606e-04
Loss = 6.1492e-01, PNorm = 43.0203, GNorm = 2.7477, lr_0 = 9.2542e-04
Loss = 5.0334e-01, PNorm = 43.0393, GNorm = 1.7655, lr_0 = 9.2479e-04
Loss = 5.0623e-01, PNorm = 43.0581, GNorm = 1.2921, lr_0 = 9.2415e-04
Loss = 5.3646e-01, PNorm = 43.0745, GNorm = 1.1648, lr_0 = 9.2352e-04
Loss = 4.8631e-01, PNorm = 43.0858, GNorm = 1.6307, lr_0 = 9.2289e-04
Loss = 4.3341e-01, PNorm = 43.1042, GNorm = 1.4233, lr_0 = 9.2226e-04
Loss = 5.3931e-01, PNorm = 43.1197, GNorm = 1.2962, lr_0 = 9.2162e-04
Loss = 5.2038e-01, PNorm = 43.1406, GNorm = 1.6151, lr_0 = 9.2099e-04
Validation mae = 0.127656
Epoch 3
Loss = 4.7316e-01, PNorm = 43.1606, GNorm = 1.3942, lr_0 = 9.2036e-04
Loss = 5.5273e-01, PNorm = 43.1912, GNorm = 1.5338, lr_0 = 9.1973e-04
Loss = 4.8053e-01, PNorm = 43.2130, GNorm = 2.0341, lr_0 = 9.1910e-04
Loss = 4.3953e-01, PNorm = 43.2291, GNorm = 1.4939, lr_0 = 9.1847e-04
Loss = 5.9637e-01, PNorm = 43.2487, GNorm = 3.4433, lr_0 = 9.1784e-04
Loss = 6.4807e-01, PNorm = 43.2787, GNorm = 1.6588, lr_0 = 9.1721e-04
Loss = 5.2093e-01, PNorm = 43.3074, GNorm = 2.4074, lr_0 = 9.1658e-04
Loss = 4.3599e-01, PNorm = 43.3262, GNorm = 1.3437, lr_0 = 9.1596e-04
Loss = 5.5023e-01, PNorm = 43.3477, GNorm = 1.1096, lr_0 = 9.1533e-04
Loss = 5.7875e-01, PNorm = 43.3650, GNorm = 1.3201, lr_0 = 9.1470e-04
Loss = 5.4547e-01, PNorm = 43.3866, GNorm = 1.4507, lr_0 = 9.1408e-04
Loss = 5.0609e-01, PNorm = 43.4175, GNorm = 1.5091, lr_0 = 9.1345e-04
Loss = 5.8927e-01, PNorm = 43.4340, GNorm = 1.5407, lr_0 = 9.1282e-04
Loss = 5.2744e-01, PNorm = 43.4578, GNorm = 1.8352, lr_0 = 9.1220e-04
Loss = 5.5110e-01, PNorm = 43.4747, GNorm = 2.1930, lr_0 = 9.1157e-04
Loss = 4.6375e-01, PNorm = 43.4877, GNorm = 2.0213, lr_0 = 9.1095e-04
Loss = 5.2304e-01, PNorm = 43.5111, GNorm = 1.8824, lr_0 = 9.1032e-04
Loss = 4.9886e-01, PNorm = 43.5303, GNorm = 3.5394, lr_0 = 9.0970e-04
Loss = 5.3723e-01, PNorm = 43.5534, GNorm = 1.6298, lr_0 = 9.0908e-04
Loss = 5.2049e-01, PNorm = 43.5720, GNorm = 1.1079, lr_0 = 9.0846e-04
Loss = 5.5684e-01, PNorm = 43.5905, GNorm = 4.1567, lr_0 = 9.0783e-04
Loss = 4.9781e-01, PNorm = 43.6127, GNorm = 3.2290, lr_0 = 9.0721e-04
Loss = 5.1314e-01, PNorm = 43.6344, GNorm = 0.9899, lr_0 = 9.0659e-04
Loss = 5.6262e-01, PNorm = 43.6593, GNorm = 1.7817, lr_0 = 9.0597e-04
Loss = 4.1175e-01, PNorm = 43.6820, GNorm = 1.2318, lr_0 = 9.0535e-04
Loss = 5.0548e-01, PNorm = 43.6918, GNorm = 1.8459, lr_0 = 9.0473e-04
Loss = 5.1372e-01, PNorm = 43.7137, GNorm = 1.2136, lr_0 = 9.0411e-04
Loss = 5.4669e-01, PNorm = 43.7315, GNorm = 1.2704, lr_0 = 9.0349e-04
Loss = 5.5886e-01, PNorm = 43.7522, GNorm = 1.3215, lr_0 = 9.0287e-04
Loss = 5.1660e-01, PNorm = 43.7808, GNorm = 0.9423, lr_0 = 9.0225e-04
Loss = 4.5955e-01, PNorm = 43.7968, GNorm = 1.6765, lr_0 = 9.0163e-04
Loss = 5.7451e-01, PNorm = 43.8264, GNorm = 1.7223, lr_0 = 9.0102e-04
Loss = 5.6272e-01, PNorm = 43.8462, GNorm = 1.3687, lr_0 = 9.0040e-04
Loss = 5.2157e-01, PNorm = 43.8652, GNorm = 2.0161, lr_0 = 8.9978e-04
Loss = 4.9832e-01, PNorm = 43.8869, GNorm = 2.8203, lr_0 = 8.9916e-04
Loss = 5.6607e-01, PNorm = 43.9065, GNorm = 1.3585, lr_0 = 8.9855e-04
Loss = 4.4014e-01, PNorm = 43.9243, GNorm = 1.2396, lr_0 = 8.9793e-04
Loss = 5.5748e-01, PNorm = 43.9427, GNorm = 2.6510, lr_0 = 8.9732e-04
Loss = 5.0943e-01, PNorm = 43.9405, GNorm = 3.1143, lr_0 = 8.9670e-04
Loss = 5.1068e-01, PNorm = 43.9591, GNorm = 1.0677, lr_0 = 8.9609e-04
Loss = 5.7401e-01, PNorm = 43.9867, GNorm = 1.8567, lr_0 = 8.9548e-04
Loss = 5.0380e-01, PNorm = 44.0119, GNorm = 1.6032, lr_0 = 8.9486e-04
Loss = 5.0282e-01, PNorm = 44.0458, GNorm = 2.1851, lr_0 = 8.9425e-04
Loss = 5.4965e-01, PNorm = 44.0600, GNorm = 1.1257, lr_0 = 8.9364e-04
Loss = 4.9736e-01, PNorm = 44.0813, GNorm = 1.0267, lr_0 = 8.9302e-04
Loss = 4.7579e-01, PNorm = 44.1070, GNorm = 2.0705, lr_0 = 8.9241e-04
Loss = 4.7367e-01, PNorm = 44.1250, GNorm = 1.2011, lr_0 = 8.9180e-04
Loss = 5.4107e-01, PNorm = 44.1477, GNorm = 1.9546, lr_0 = 8.9119e-04
Loss = 5.2565e-01, PNorm = 44.1667, GNorm = 1.5989, lr_0 = 8.9058e-04
Loss = 5.5061e-01, PNorm = 44.1966, GNorm = 1.7732, lr_0 = 8.8997e-04
Loss = 4.7879e-01, PNorm = 44.2144, GNorm = 1.3367, lr_0 = 8.8936e-04
Loss = 5.2904e-01, PNorm = 44.2368, GNorm = 0.9289, lr_0 = 8.8875e-04
Loss = 5.1192e-01, PNorm = 44.2680, GNorm = 2.3309, lr_0 = 8.8814e-04
Loss = 5.0347e-01, PNorm = 44.2866, GNorm = 1.2916, lr_0 = 8.8753e-04
Loss = 4.7615e-01, PNorm = 44.3138, GNorm = 2.2293, lr_0 = 8.8693e-04
Loss = 4.9755e-01, PNorm = 44.3433, GNorm = 1.5025, lr_0 = 8.8632e-04
Loss = 5.3016e-01, PNorm = 44.3687, GNorm = 4.1341, lr_0 = 8.8571e-04
Loss = 5.5782e-01, PNorm = 44.3809, GNorm = 0.8927, lr_0 = 8.8510e-04
Loss = 4.7530e-01, PNorm = 44.4093, GNorm = 0.9755, lr_0 = 8.8450e-04
Loss = 4.8773e-01, PNorm = 44.4373, GNorm = 1.8347, lr_0 = 8.8389e-04
Loss = 5.2241e-01, PNorm = 44.4548, GNorm = 2.3248, lr_0 = 8.8329e-04
Loss = 5.5584e-01, PNorm = 44.4789, GNorm = 2.7322, lr_0 = 8.8268e-04
Loss = 5.4377e-01, PNorm = 44.5090, GNorm = 0.9891, lr_0 = 8.8208e-04
Loss = 5.0640e-01, PNorm = 44.5300, GNorm = 1.3662, lr_0 = 8.8147e-04
Loss = 5.0480e-01, PNorm = 44.5508, GNorm = 2.0289, lr_0 = 8.8087e-04
Loss = 5.0623e-01, PNorm = 44.5751, GNorm = 2.0967, lr_0 = 8.8026e-04
Loss = 4.8107e-01, PNorm = 44.6041, GNorm = 1.8727, lr_0 = 8.7966e-04
Loss = 5.1043e-01, PNorm = 44.6142, GNorm = 0.8530, lr_0 = 8.7906e-04
Loss = 4.8458e-01, PNorm = 44.6298, GNorm = 3.0335, lr_0 = 8.7846e-04
Loss = 4.4120e-01, PNorm = 44.6404, GNorm = 2.9071, lr_0 = 8.7785e-04
Loss = 4.7289e-01, PNorm = 44.6579, GNorm = 1.1457, lr_0 = 8.7725e-04
Loss = 4.6236e-01, PNorm = 44.6851, GNorm = 1.4415, lr_0 = 8.7665e-04
Loss = 4.2781e-01, PNorm = 44.6958, GNorm = 1.2728, lr_0 = 8.7605e-04
Loss = 4.8567e-01, PNorm = 44.7034, GNorm = 0.9876, lr_0 = 8.7545e-04
Loss = 5.5388e-01, PNorm = 44.7186, GNorm = 1.6285, lr_0 = 8.7485e-04
Loss = 4.7412e-01, PNorm = 44.7289, GNorm = 1.5032, lr_0 = 8.7425e-04
Loss = 4.8742e-01, PNorm = 44.7425, GNorm = 1.9332, lr_0 = 8.7365e-04
Loss = 5.3369e-01, PNorm = 44.7712, GNorm = 0.9984, lr_0 = 8.7306e-04
Loss = 4.3980e-01, PNorm = 44.7853, GNorm = 0.9994, lr_0 = 8.7246e-04
Loss = 4.4979e-01, PNorm = 44.7987, GNorm = 1.3375, lr_0 = 8.7186e-04
Loss = 4.7554e-01, PNorm = 44.8124, GNorm = 0.9983, lr_0 = 8.7126e-04
Loss = 4.5369e-01, PNorm = 44.8193, GNorm = 1.1275, lr_0 = 8.7067e-04
Loss = 4.9553e-01, PNorm = 44.8415, GNorm = 1.4244, lr_0 = 8.7007e-04
Loss = 5.7601e-01, PNorm = 44.8604, GNorm = 1.0518, lr_0 = 8.6947e-04
Loss = 5.4902e-01, PNorm = 44.8789, GNorm = 1.0957, lr_0 = 8.6888e-04
Loss = 4.8834e-01, PNorm = 44.8920, GNorm = 1.5166, lr_0 = 8.6828e-04
Loss = 4.8319e-01, PNorm = 44.9042, GNorm = 2.2879, lr_0 = 8.6769e-04
Loss = 5.2599e-01, PNorm = 44.9271, GNorm = 2.3760, lr_0 = 8.6709e-04
Loss = 4.7682e-01, PNorm = 44.9445, GNorm = 1.2893, lr_0 = 8.6650e-04
Loss = 5.2094e-01, PNorm = 44.9595, GNorm = 2.1274, lr_0 = 8.6590e-04
Loss = 5.5707e-01, PNorm = 44.9823, GNorm = 1.6255, lr_0 = 8.6531e-04
Loss = 5.2007e-01, PNorm = 45.0039, GNorm = 1.0433, lr_0 = 8.6472e-04
Loss = 5.4403e-01, PNorm = 45.0194, GNorm = 1.9859, lr_0 = 8.6413e-04
Loss = 4.1870e-01, PNorm = 45.0348, GNorm = 2.2883, lr_0 = 8.6353e-04
Loss = 4.9502e-01, PNorm = 45.0548, GNorm = 1.2261, lr_0 = 8.6294e-04
Loss = 5.3626e-01, PNorm = 45.0723, GNorm = 1.9754, lr_0 = 8.6235e-04
Loss = 5.4774e-01, PNorm = 45.0880, GNorm = 1.4699, lr_0 = 8.6176e-04
Loss = 4.8908e-01, PNorm = 45.1055, GNorm = 1.0244, lr_0 = 8.6117e-04
Loss = 4.9193e-01, PNorm = 45.1237, GNorm = 1.8154, lr_0 = 8.6058e-04
Loss = 4.4970e-01, PNorm = 45.1396, GNorm = 2.1001, lr_0 = 8.5999e-04
Loss = 5.3737e-01, PNorm = 45.1661, GNorm = 1.1095, lr_0 = 8.5940e-04
Loss = 5.1848e-01, PNorm = 45.1901, GNorm = 1.1217, lr_0 = 8.5881e-04
Loss = 6.0434e-01, PNorm = 45.2092, GNorm = 1.0282, lr_0 = 8.5823e-04
Loss = 5.2652e-01, PNorm = 45.2308, GNorm = 1.5039, lr_0 = 8.5764e-04
Loss = 4.8363e-01, PNorm = 45.2531, GNorm = 1.9582, lr_0 = 8.5705e-04
Loss = 4.7804e-01, PNorm = 45.2753, GNorm = 1.2357, lr_0 = 8.5646e-04
Loss = 4.5958e-01, PNorm = 45.2945, GNorm = 1.3884, lr_0 = 8.5588e-04
Loss = 4.7249e-01, PNorm = 45.3090, GNorm = 1.0934, lr_0 = 8.5529e-04
Loss = 5.0303e-01, PNorm = 45.3326, GNorm = 2.2222, lr_0 = 8.5470e-04
Loss = 4.7009e-01, PNorm = 45.3468, GNorm = 1.1515, lr_0 = 8.5412e-04
Loss = 5.1187e-01, PNorm = 45.3564, GNorm = 1.6967, lr_0 = 8.5353e-04
Loss = 5.9142e-01, PNorm = 45.3703, GNorm = 1.1725, lr_0 = 8.5295e-04
Loss = 5.4776e-01, PNorm = 45.3918, GNorm = 1.3578, lr_0 = 8.5236e-04
Loss = 4.7873e-01, PNorm = 45.4136, GNorm = 0.9422, lr_0 = 8.5178e-04
Loss = 5.1704e-01, PNorm = 45.4386, GNorm = 2.0143, lr_0 = 8.5120e-04
Loss = 5.6018e-01, PNorm = 45.4475, GNorm = 1.8273, lr_0 = 8.5061e-04
Loss = 4.6004e-01, PNorm = 45.4613, GNorm = 1.2857, lr_0 = 8.5003e-04
Loss = 4.4786e-01, PNorm = 45.4811, GNorm = 1.0418, lr_0 = 8.4945e-04
Loss = 4.8244e-01, PNorm = 45.5029, GNorm = 1.0607, lr_0 = 8.4887e-04
Loss = 4.8402e-01, PNorm = 45.5149, GNorm = 1.8668, lr_0 = 8.4828e-04
Validation mae = 0.123721
Epoch 4
Loss = 4.8576e-01, PNorm = 45.5289, GNorm = 1.4535, lr_0 = 8.4770e-04
Loss = 4.7503e-01, PNorm = 45.5525, GNorm = 1.6555, lr_0 = 8.4712e-04
Loss = 5.0381e-01, PNorm = 45.5715, GNorm = 1.2240, lr_0 = 8.4654e-04
Loss = 5.1569e-01, PNorm = 45.5876, GNorm = 1.6472, lr_0 = 8.4596e-04
Loss = 5.2744e-01, PNorm = 45.6101, GNorm = 2.7013, lr_0 = 8.4538e-04
Loss = 5.2251e-01, PNorm = 45.6365, GNorm = 3.4331, lr_0 = 8.4480e-04
Loss = 4.7744e-01, PNorm = 45.6608, GNorm = 1.2807, lr_0 = 8.4423e-04
Loss = 4.2565e-01, PNorm = 45.6788, GNorm = 0.8610, lr_0 = 8.4365e-04
Loss = 5.2364e-01, PNorm = 45.7007, GNorm = 0.8248, lr_0 = 8.4307e-04
Loss = 4.3172e-01, PNorm = 45.7227, GNorm = 1.2389, lr_0 = 8.4249e-04
Loss = 5.5687e-01, PNorm = 45.7398, GNorm = 1.5796, lr_0 = 8.4191e-04
Loss = 4.9430e-01, PNorm = 45.7520, GNorm = 1.0293, lr_0 = 8.4134e-04
Loss = 5.6962e-01, PNorm = 45.7736, GNorm = 2.1193, lr_0 = 8.4076e-04
Loss = 4.6071e-01, PNorm = 45.7963, GNorm = 2.2889, lr_0 = 8.4019e-04
Loss = 4.7351e-01, PNorm = 45.8220, GNorm = 1.1207, lr_0 = 8.3961e-04
Loss = 4.8569e-01, PNorm = 45.8483, GNorm = 2.2437, lr_0 = 8.3903e-04
Loss = 4.5869e-01, PNorm = 45.8662, GNorm = 1.1197, lr_0 = 8.3846e-04
Loss = 4.8182e-01, PNorm = 45.8872, GNorm = 1.7485, lr_0 = 8.3789e-04
Loss = 4.2317e-01, PNorm = 45.9078, GNorm = 1.2737, lr_0 = 8.3731e-04
Loss = 4.8415e-01, PNorm = 45.9293, GNorm = 1.2289, lr_0 = 8.3674e-04
Loss = 4.7029e-01, PNorm = 45.9559, GNorm = 1.5934, lr_0 = 8.3616e-04
Loss = 4.7071e-01, PNorm = 45.9767, GNorm = 1.1485, lr_0 = 8.3559e-04
Loss = 5.4310e-01, PNorm = 46.0018, GNorm = 2.3197, lr_0 = 8.3502e-04
Loss = 5.2025e-01, PNorm = 46.0326, GNorm = 1.3662, lr_0 = 8.3445e-04
Loss = 5.1675e-01, PNorm = 46.0510, GNorm = 1.6137, lr_0 = 8.3388e-04
Loss = 4.5190e-01, PNorm = 46.0705, GNorm = 2.6187, lr_0 = 8.3330e-04
Loss = 4.7975e-01, PNorm = 46.0930, GNorm = 1.3635, lr_0 = 8.3273e-04
Loss = 4.6242e-01, PNorm = 46.1149, GNorm = 1.7188, lr_0 = 8.3216e-04
Loss = 4.5561e-01, PNorm = 46.1369, GNorm = 2.2366, lr_0 = 8.3159e-04
Loss = 5.1012e-01, PNorm = 46.1572, GNorm = 1.3608, lr_0 = 8.3102e-04
Loss = 4.6079e-01, PNorm = 46.1805, GNorm = 1.0968, lr_0 = 8.3045e-04
Loss = 4.9238e-01, PNorm = 46.2071, GNorm = 1.3308, lr_0 = 8.2988e-04
Loss = 4.8694e-01, PNorm = 46.2203, GNorm = 1.1139, lr_0 = 8.2932e-04
Loss = 4.4200e-01, PNorm = 46.2355, GNorm = 1.6007, lr_0 = 8.2875e-04
Loss = 5.2512e-01, PNorm = 46.2574, GNorm = 5.1008, lr_0 = 8.2818e-04
Loss = 5.2988e-01, PNorm = 46.2849, GNorm = 1.2055, lr_0 = 8.2761e-04
Loss = 4.5267e-01, PNorm = 46.3047, GNorm = 1.1495, lr_0 = 8.2705e-04
Loss = 5.1079e-01, PNorm = 46.3181, GNorm = 1.1690, lr_0 = 8.2648e-04
Loss = 5.0183e-01, PNorm = 46.3284, GNorm = 2.9048, lr_0 = 8.2591e-04
Loss = 5.1684e-01, PNorm = 46.3415, GNorm = 1.2905, lr_0 = 8.2535e-04
Loss = 5.1431e-01, PNorm = 46.3545, GNorm = 1.5564, lr_0 = 8.2478e-04
Loss = 4.5079e-01, PNorm = 46.3780, GNorm = 1.5653, lr_0 = 8.2422e-04
Loss = 5.0008e-01, PNorm = 46.3996, GNorm = 0.9868, lr_0 = 8.2365e-04
Loss = 5.3415e-01, PNorm = 46.4137, GNorm = 1.4978, lr_0 = 8.2309e-04
Loss = 5.2573e-01, PNorm = 46.4302, GNorm = 0.9348, lr_0 = 8.2252e-04
Loss = 4.4289e-01, PNorm = 46.4488, GNorm = 1.1244, lr_0 = 8.2196e-04
Loss = 5.3664e-01, PNorm = 46.4721, GNorm = 1.4629, lr_0 = 8.2140e-04
Loss = 5.3691e-01, PNorm = 46.4907, GNorm = 2.2740, lr_0 = 8.2084e-04
Loss = 5.2157e-01, PNorm = 46.5103, GNorm = 1.6567, lr_0 = 8.2027e-04
Loss = 4.6819e-01, PNorm = 46.5408, GNorm = 1.7248, lr_0 = 8.1971e-04
Loss = 4.5870e-01, PNorm = 46.5593, GNorm = 1.0912, lr_0 = 8.1915e-04
Loss = 4.7008e-01, PNorm = 46.5755, GNorm = 1.0922, lr_0 = 8.1859e-04
Loss = 4.6648e-01, PNorm = 46.5976, GNorm = 1.2084, lr_0 = 8.1803e-04
Loss = 5.1655e-01, PNorm = 46.6280, GNorm = 1.7191, lr_0 = 8.1747e-04
Loss = 4.9551e-01, PNorm = 46.6524, GNorm = 2.6791, lr_0 = 8.1691e-04
Loss = 4.3232e-01, PNorm = 46.6784, GNorm = 1.2600, lr_0 = 8.1635e-04
Loss = 4.7033e-01, PNorm = 46.6959, GNorm = 1.7052, lr_0 = 8.1579e-04
Loss = 5.0238e-01, PNorm = 46.7202, GNorm = 0.7960, lr_0 = 8.1523e-04
Loss = 4.5124e-01, PNorm = 46.7433, GNorm = 1.5124, lr_0 = 8.1467e-04
Loss = 4.8346e-01, PNorm = 46.7527, GNorm = 0.7697, lr_0 = 8.1411e-04
Loss = 5.2353e-01, PNorm = 46.7640, GNorm = 1.1551, lr_0 = 8.1355e-04
Loss = 5.1701e-01, PNorm = 46.7860, GNorm = 1.2526, lr_0 = 8.1300e-04
Loss = 4.4816e-01, PNorm = 46.8015, GNorm = 3.1707, lr_0 = 8.1244e-04
Loss = 5.7231e-01, PNorm = 46.8235, GNorm = 1.1271, lr_0 = 8.1188e-04
Loss = 5.2528e-01, PNorm = 46.8593, GNorm = 1.4339, lr_0 = 8.1133e-04
Loss = 4.4133e-01, PNorm = 46.8820, GNorm = 1.1866, lr_0 = 8.1077e-04
Loss = 5.0460e-01, PNorm = 46.8946, GNorm = 1.3257, lr_0 = 8.1022e-04
Loss = 4.6505e-01, PNorm = 46.9131, GNorm = 1.2496, lr_0 = 8.0966e-04
Loss = 5.1007e-01, PNorm = 46.9328, GNorm = 0.9771, lr_0 = 8.0911e-04
Loss = 5.1784e-01, PNorm = 46.9541, GNorm = 1.7647, lr_0 = 8.0855e-04
Loss = 5.1383e-01, PNorm = 46.9702, GNorm = 2.1735, lr_0 = 8.0800e-04
Loss = 5.0028e-01, PNorm = 46.9897, GNorm = 1.0704, lr_0 = 8.0745e-04
Loss = 4.5277e-01, PNorm = 47.0051, GNorm = 1.6927, lr_0 = 8.0689e-04
Loss = 5.2206e-01, PNorm = 47.0166, GNorm = 0.7946, lr_0 = 8.0634e-04
Loss = 5.0271e-01, PNorm = 47.0313, GNorm = 1.3836, lr_0 = 8.0579e-04
Loss = 5.2118e-01, PNorm = 47.0501, GNorm = 1.4595, lr_0 = 8.0523e-04
Loss = 4.7236e-01, PNorm = 47.0641, GNorm = 0.9856, lr_0 = 8.0468e-04
Loss = 4.4766e-01, PNorm = 47.0822, GNorm = 1.4834, lr_0 = 8.0413e-04
Loss = 5.0131e-01, PNorm = 47.1003, GNorm = 1.6399, lr_0 = 8.0358e-04
Loss = 4.0193e-01, PNorm = 47.1096, GNorm = 1.1889, lr_0 = 8.0303e-04
Loss = 4.6805e-01, PNorm = 47.1234, GNorm = 1.5509, lr_0 = 8.0248e-04
Loss = 5.2193e-01, PNorm = 47.1363, GNorm = 1.6997, lr_0 = 8.0193e-04
Loss = 4.7207e-01, PNorm = 47.1470, GNorm = 0.9303, lr_0 = 8.0138e-04
Loss = 5.5608e-01, PNorm = 47.1698, GNorm = 1.5894, lr_0 = 8.0083e-04
Loss = 5.2934e-01, PNorm = 47.1923, GNorm = 1.2059, lr_0 = 8.0028e-04
Loss = 5.1595e-01, PNorm = 47.2179, GNorm = 1.6917, lr_0 = 7.9974e-04
Loss = 4.7721e-01, PNorm = 47.2426, GNorm = 2.3683, lr_0 = 7.9919e-04
Loss = 4.7254e-01, PNorm = 47.2549, GNorm = 1.3111, lr_0 = 7.9864e-04
Loss = 4.7900e-01, PNorm = 47.2722, GNorm = 0.8740, lr_0 = 7.9809e-04
Loss = 5.1834e-01, PNorm = 47.2904, GNorm = 1.2702, lr_0 = 7.9755e-04
Loss = 4.8944e-01, PNorm = 47.3101, GNorm = 1.7504, lr_0 = 7.9700e-04
Loss = 4.2571e-01, PNorm = 47.3275, GNorm = 1.3910, lr_0 = 7.9645e-04
Loss = 4.8951e-01, PNorm = 47.3380, GNorm = 1.2872, lr_0 = 7.9591e-04
Loss = 5.0361e-01, PNorm = 47.3574, GNorm = 2.2587, lr_0 = 7.9536e-04
Loss = 4.7927e-01, PNorm = 47.3742, GNorm = 1.7437, lr_0 = 7.9482e-04
Loss = 5.3537e-01, PNorm = 47.3896, GNorm = 1.2527, lr_0 = 7.9427e-04
Loss = 5.7657e-01, PNorm = 47.4116, GNorm = 1.2010, lr_0 = 7.9373e-04
Loss = 4.2477e-01, PNorm = 47.4398, GNorm = 1.1597, lr_0 = 7.9319e-04
Loss = 4.8435e-01, PNorm = 47.4523, GNorm = 0.9257, lr_0 = 7.9264e-04
Loss = 4.7044e-01, PNorm = 47.4713, GNorm = 1.2615, lr_0 = 7.9210e-04
Loss = 4.0812e-01, PNorm = 47.4825, GNorm = 1.6265, lr_0 = 7.9156e-04
Loss = 4.1771e-01, PNorm = 47.4995, GNorm = 1.1430, lr_0 = 7.9101e-04
Loss = 4.4208e-01, PNorm = 47.5138, GNorm = 1.5903, lr_0 = 7.9047e-04
Loss = 4.3689e-01, PNorm = 47.5305, GNorm = 1.5724, lr_0 = 7.8993e-04
Loss = 4.6017e-01, PNorm = 47.5439, GNorm = 1.7765, lr_0 = 7.8939e-04
Loss = 4.4327e-01, PNorm = 47.5625, GNorm = 1.6714, lr_0 = 7.8885e-04
Loss = 5.3600e-01, PNorm = 47.5800, GNorm = 1.7052, lr_0 = 7.8831e-04
Loss = 4.3076e-01, PNorm = 47.5976, GNorm = 1.1778, lr_0 = 7.8777e-04
Loss = 5.4623e-01, PNorm = 47.6228, GNorm = 2.1983, lr_0 = 7.8723e-04
Loss = 5.1292e-01, PNorm = 47.6417, GNorm = 1.7698, lr_0 = 7.8669e-04
Loss = 5.0591e-01, PNorm = 47.6601, GNorm = 1.5067, lr_0 = 7.8615e-04
Loss = 5.2798e-01, PNorm = 47.6854, GNorm = 2.3880, lr_0 = 7.8561e-04
Loss = 5.0890e-01, PNorm = 47.7024, GNorm = 1.8171, lr_0 = 7.8507e-04
Loss = 5.0887e-01, PNorm = 47.7248, GNorm = 1.0753, lr_0 = 7.8454e-04
Loss = 4.3532e-01, PNorm = 47.7486, GNorm = 0.9468, lr_0 = 7.8400e-04
Loss = 4.9901e-01, PNorm = 47.7662, GNorm = 2.2202, lr_0 = 7.8346e-04
Loss = 4.2582e-01, PNorm = 47.7861, GNorm = 1.4272, lr_0 = 7.8293e-04
Loss = 4.8411e-01, PNorm = 47.8026, GNorm = 1.4862, lr_0 = 7.8239e-04
Loss = 4.8592e-01, PNorm = 47.8200, GNorm = 1.4244, lr_0 = 7.8185e-04
Loss = 4.7841e-01, PNorm = 47.8408, GNorm = 1.2873, lr_0 = 7.8132e-04
Validation mae = 0.119897
Epoch 5
Loss = 4.5662e-01, PNorm = 47.8549, GNorm = 1.3657, lr_0 = 7.8078e-04
Loss = 4.5556e-01, PNorm = 47.8698, GNorm = 1.0323, lr_0 = 7.8025e-04
Loss = 5.1512e-01, PNorm = 47.8821, GNorm = 1.0639, lr_0 = 7.7971e-04
Loss = 4.7174e-01, PNorm = 47.9004, GNorm = 1.4152, lr_0 = 7.7918e-04
Loss = 4.3440e-01, PNorm = 47.9129, GNorm = 0.9414, lr_0 = 7.7864e-04
Loss = 4.5451e-01, PNorm = 47.9270, GNorm = 2.2225, lr_0 = 7.7811e-04
Loss = 4.8056e-01, PNorm = 47.9457, GNorm = 1.4151, lr_0 = 7.7758e-04
Loss = 4.7876e-01, PNorm = 47.9642, GNorm = 2.1828, lr_0 = 7.7705e-04
Loss = 5.0900e-01, PNorm = 47.9903, GNorm = 2.1856, lr_0 = 7.7651e-04
Loss = 4.2406e-01, PNorm = 48.0080, GNorm = 1.2980, lr_0 = 7.7598e-04
Loss = 3.8387e-01, PNorm = 48.0299, GNorm = 1.6369, lr_0 = 7.7545e-04
Loss = 4.2018e-01, PNorm = 48.0468, GNorm = 1.2260, lr_0 = 7.7492e-04
Loss = 4.8366e-01, PNorm = 48.0639, GNorm = 1.0681, lr_0 = 7.7439e-04
Loss = 4.9480e-01, PNorm = 48.0819, GNorm = 1.6041, lr_0 = 7.7386e-04
Loss = 5.0441e-01, PNorm = 48.1040, GNorm = 2.4762, lr_0 = 7.7333e-04
Loss = 4.4477e-01, PNorm = 48.1272, GNorm = 1.0351, lr_0 = 7.7280e-04
Loss = 4.2769e-01, PNorm = 48.1516, GNorm = 0.7904, lr_0 = 7.7227e-04
Loss = 4.6065e-01, PNorm = 48.1727, GNorm = 1.5515, lr_0 = 7.7174e-04
Loss = 4.8776e-01, PNorm = 48.1845, GNorm = 1.9101, lr_0 = 7.7121e-04
Loss = 5.0642e-01, PNorm = 48.2016, GNorm = 1.8142, lr_0 = 7.7068e-04
Loss = 4.9110e-01, PNorm = 48.2229, GNorm = 0.8136, lr_0 = 7.7015e-04
Loss = 4.3335e-01, PNorm = 48.2454, GNorm = 2.3444, lr_0 = 7.6963e-04
Loss = 4.6630e-01, PNorm = 48.2614, GNorm = 1.2116, lr_0 = 7.6910e-04
Loss = 4.9837e-01, PNorm = 48.2764, GNorm = 1.3507, lr_0 = 7.6857e-04
Loss = 3.9271e-01, PNorm = 48.3011, GNorm = 1.5319, lr_0 = 7.6805e-04
Loss = 5.1759e-01, PNorm = 48.3192, GNorm = 1.3798, lr_0 = 7.6752e-04
Loss = 4.3069e-01, PNorm = 48.3371, GNorm = 2.4812, lr_0 = 7.6699e-04
Loss = 5.4255e-01, PNorm = 48.3649, GNorm = 1.5863, lr_0 = 7.6647e-04
Loss = 4.9253e-01, PNorm = 48.3836, GNorm = 1.1768, lr_0 = 7.6594e-04
Loss = 4.8319e-01, PNorm = 48.4036, GNorm = 2.7175, lr_0 = 7.6542e-04
Loss = 4.5011e-01, PNorm = 48.4204, GNorm = 1.1188, lr_0 = 7.6489e-04
Loss = 4.9295e-01, PNorm = 48.4458, GNorm = 0.8507, lr_0 = 7.6437e-04
Loss = 4.9027e-01, PNorm = 48.4659, GNorm = 1.3530, lr_0 = 7.6385e-04
Loss = 4.1012e-01, PNorm = 48.4773, GNorm = 0.9020, lr_0 = 7.6332e-04
Loss = 4.0013e-01, PNorm = 48.4986, GNorm = 1.4336, lr_0 = 7.6280e-04
Loss = 4.9057e-01, PNorm = 48.5268, GNorm = 1.3969, lr_0 = 7.6228e-04
Loss = 4.7316e-01, PNorm = 48.5472, GNorm = 1.2885, lr_0 = 7.6176e-04
Loss = 4.6293e-01, PNorm = 48.5662, GNorm = 1.0998, lr_0 = 7.6123e-04
Loss = 4.7429e-01, PNorm = 48.5808, GNorm = 1.4637, lr_0 = 7.6071e-04
Loss = 4.6798e-01, PNorm = 48.5966, GNorm = 1.3290, lr_0 = 7.6019e-04
Loss = 4.5870e-01, PNorm = 48.6123, GNorm = 1.4565, lr_0 = 7.5967e-04
Loss = 4.5288e-01, PNorm = 48.6213, GNorm = 0.7494, lr_0 = 7.5915e-04
Loss = 5.0073e-01, PNorm = 48.6369, GNorm = 2.4592, lr_0 = 7.5863e-04
Loss = 4.9335e-01, PNorm = 48.6651, GNorm = 1.9741, lr_0 = 7.5811e-04
Loss = 5.0808e-01, PNorm = 48.6823, GNorm = 2.3133, lr_0 = 7.5759e-04
Loss = 5.4925e-01, PNorm = 48.7093, GNorm = 1.8469, lr_0 = 7.5707e-04
Loss = 4.2571e-01, PNorm = 48.7342, GNorm = 2.5417, lr_0 = 7.5655e-04
Loss = 4.2955e-01, PNorm = 48.7460, GNorm = 1.3576, lr_0 = 7.5603e-04
Loss = 5.2923e-01, PNorm = 48.7615, GNorm = 1.9235, lr_0 = 7.5552e-04
Loss = 4.8417e-01, PNorm = 48.7777, GNorm = 1.7621, lr_0 = 7.5500e-04
Loss = 4.1963e-01, PNorm = 48.7930, GNorm = 1.3801, lr_0 = 7.5448e-04
Loss = 4.6151e-01, PNorm = 48.8039, GNorm = 1.0746, lr_0 = 7.5397e-04
Loss = 4.6439e-01, PNorm = 48.8208, GNorm = 2.2307, lr_0 = 7.5345e-04
Loss = 5.2289e-01, PNorm = 48.8389, GNorm = 1.4420, lr_0 = 7.5293e-04
Loss = 4.9493e-01, PNorm = 48.8545, GNorm = 1.3740, lr_0 = 7.5242e-04
Loss = 4.3325e-01, PNorm = 48.8777, GNorm = 1.5537, lr_0 = 7.5190e-04
Loss = 4.1404e-01, PNorm = 48.8977, GNorm = 1.5950, lr_0 = 7.5139e-04
Loss = 4.8016e-01, PNorm = 48.9118, GNorm = 0.8883, lr_0 = 7.5087e-04
Loss = 4.4070e-01, PNorm = 48.9391, GNorm = 1.8310, lr_0 = 7.5036e-04
Loss = 4.9690e-01, PNorm = 48.9544, GNorm = 1.4958, lr_0 = 7.4984e-04
Loss = 5.0707e-01, PNorm = 48.9751, GNorm = 1.1818, lr_0 = 7.4933e-04
Loss = 4.7956e-01, PNorm = 48.9967, GNorm = 1.0384, lr_0 = 7.4882e-04
Loss = 4.8842e-01, PNorm = 49.0146, GNorm = 1.4478, lr_0 = 7.4830e-04
Loss = 4.3969e-01, PNorm = 49.0422, GNorm = 1.8620, lr_0 = 7.4779e-04
Loss = 4.4373e-01, PNorm = 49.0599, GNorm = 1.1063, lr_0 = 7.4728e-04
Loss = 4.9898e-01, PNorm = 49.0773, GNorm = 1.3251, lr_0 = 7.4677e-04
Loss = 4.4081e-01, PNorm = 49.0910, GNorm = 1.6980, lr_0 = 7.4625e-04
Loss = 5.1778e-01, PNorm = 49.1020, GNorm = 2.1954, lr_0 = 7.4574e-04
Loss = 4.8562e-01, PNorm = 49.1156, GNorm = 1.9525, lr_0 = 7.4523e-04
Loss = 4.9640e-01, PNorm = 49.1370, GNorm = 2.2100, lr_0 = 7.4472e-04
Loss = 4.7790e-01, PNorm = 49.1585, GNorm = 1.1929, lr_0 = 7.4421e-04
Loss = 5.2813e-01, PNorm = 49.1793, GNorm = 1.9562, lr_0 = 7.4370e-04
Loss = 5.0988e-01, PNorm = 49.1994, GNorm = 1.9966, lr_0 = 7.4319e-04
Loss = 5.2956e-01, PNorm = 49.2164, GNorm = 1.0036, lr_0 = 7.4268e-04
Loss = 4.7688e-01, PNorm = 49.2254, GNorm = 0.7716, lr_0 = 7.4217e-04
Loss = 4.2271e-01, PNorm = 49.2418, GNorm = 0.9027, lr_0 = 7.4167e-04
Loss = 4.9409e-01, PNorm = 49.2620, GNorm = 2.1520, lr_0 = 7.4116e-04
Loss = 4.2700e-01, PNorm = 49.2778, GNorm = 0.9847, lr_0 = 7.4065e-04
Loss = 4.2589e-01, PNorm = 49.2962, GNorm = 1.3378, lr_0 = 7.4014e-04
Loss = 4.4194e-01, PNorm = 49.3160, GNorm = 2.8788, lr_0 = 7.3964e-04
Loss = 4.5367e-01, PNorm = 49.3323, GNorm = 1.0669, lr_0 = 7.3913e-04
Loss = 4.7066e-01, PNorm = 49.3461, GNorm = 1.5564, lr_0 = 7.3862e-04
Loss = 3.8680e-01, PNorm = 49.3595, GNorm = 0.9683, lr_0 = 7.3812e-04
Loss = 4.4416e-01, PNorm = 49.3687, GNorm = 1.4217, lr_0 = 7.3761e-04
Loss = 4.3465e-01, PNorm = 49.3832, GNorm = 1.2883, lr_0 = 7.3711e-04
Loss = 5.1888e-01, PNorm = 49.3994, GNorm = 1.1179, lr_0 = 7.3660e-04
Loss = 4.2510e-01, PNorm = 49.4154, GNorm = 1.4320, lr_0 = 7.3610e-04
Loss = 4.6577e-01, PNorm = 49.4321, GNorm = 0.9648, lr_0 = 7.3559e-04
Loss = 4.8174e-01, PNorm = 49.4503, GNorm = 1.0904, lr_0 = 7.3509e-04
Loss = 4.5872e-01, PNorm = 49.4675, GNorm = 1.3151, lr_0 = 7.3458e-04
Loss = 4.6536e-01, PNorm = 49.4839, GNorm = 1.3132, lr_0 = 7.3408e-04
Loss = 5.2278e-01, PNorm = 49.5066, GNorm = 1.6093, lr_0 = 7.3358e-04
Loss = 4.3862e-01, PNorm = 49.5241, GNorm = 1.1802, lr_0 = 7.3308e-04
Loss = 4.6259e-01, PNorm = 49.5364, GNorm = 1.0251, lr_0 = 7.3257e-04
Loss = 4.5439e-01, PNorm = 49.5495, GNorm = 2.4714, lr_0 = 7.3207e-04
Loss = 4.7741e-01, PNorm = 49.5657, GNorm = 1.8619, lr_0 = 7.3157e-04
Loss = 4.4585e-01, PNorm = 49.5735, GNorm = 0.9703, lr_0 = 7.3107e-04
Loss = 4.1905e-01, PNorm = 49.5828, GNorm = 0.8911, lr_0 = 7.3057e-04
Loss = 4.6708e-01, PNorm = 49.5926, GNorm = 1.1420, lr_0 = 7.3007e-04
Loss = 5.8549e-01, PNorm = 49.6209, GNorm = 1.7490, lr_0 = 7.2957e-04
Loss = 4.2711e-01, PNorm = 49.6469, GNorm = 1.2943, lr_0 = 7.2907e-04
Loss = 4.2856e-01, PNorm = 49.6726, GNorm = 1.4006, lr_0 = 7.2857e-04
Loss = 4.2830e-01, PNorm = 49.6923, GNorm = 1.4442, lr_0 = 7.2807e-04
Loss = 4.2101e-01, PNorm = 49.7123, GNorm = 1.1397, lr_0 = 7.2757e-04
Loss = 4.3378e-01, PNorm = 49.7219, GNorm = 1.1034, lr_0 = 7.2707e-04
Loss = 4.3732e-01, PNorm = 49.7333, GNorm = 1.7373, lr_0 = 7.2657e-04
Loss = 4.6723e-01, PNorm = 49.7509, GNorm = 1.3159, lr_0 = 7.2608e-04
Loss = 5.3360e-01, PNorm = 49.7640, GNorm = 1.4448, lr_0 = 7.2558e-04
Loss = 5.0319e-01, PNorm = 49.7835, GNorm = 1.9803, lr_0 = 7.2508e-04
Loss = 4.0512e-01, PNorm = 49.8044, GNorm = 1.2161, lr_0 = 7.2458e-04
Loss = 4.3605e-01, PNorm = 49.8254, GNorm = 1.7139, lr_0 = 7.2409e-04
Loss = 4.4132e-01, PNorm = 49.8437, GNorm = 1.2391, lr_0 = 7.2359e-04
Loss = 4.7769e-01, PNorm = 49.8560, GNorm = 2.5200, lr_0 = 7.2310e-04
Loss = 5.0092e-01, PNorm = 49.8565, GNorm = 1.1140, lr_0 = 7.2260e-04
Loss = 4.6879e-01, PNorm = 49.8709, GNorm = 1.3490, lr_0 = 7.2211e-04
Loss = 4.3885e-01, PNorm = 49.8896, GNorm = 1.3373, lr_0 = 7.2161e-04
Loss = 5.2384e-01, PNorm = 49.9048, GNorm = 2.0868, lr_0 = 7.2112e-04
Loss = 4.6531e-01, PNorm = 49.9178, GNorm = 1.9987, lr_0 = 7.2062e-04
Loss = 5.1319e-01, PNorm = 49.9355, GNorm = 1.1302, lr_0 = 7.2013e-04
Loss = 4.4157e-01, PNorm = 49.9495, GNorm = 1.2047, lr_0 = 7.1964e-04
Validation mae = 0.119859
Epoch 6
Loss = 4.9025e-01, PNorm = 49.9682, GNorm = 1.0870, lr_0 = 7.1914e-04
Loss = 4.5041e-01, PNorm = 49.9888, GNorm = 1.2888, lr_0 = 7.1865e-04
Loss = 4.3565e-01, PNorm = 50.0055, GNorm = 0.9190, lr_0 = 7.1816e-04
Loss = 4.8245e-01, PNorm = 50.0172, GNorm = 2.1426, lr_0 = 7.1767e-04
Loss = 4.5254e-01, PNorm = 50.0343, GNorm = 1.0873, lr_0 = 7.1717e-04
Loss = 4.4851e-01, PNorm = 50.0498, GNorm = 2.0014, lr_0 = 7.1668e-04
Loss = 4.7205e-01, PNorm = 50.0687, GNorm = 2.3892, lr_0 = 7.1619e-04
Loss = 3.9572e-01, PNorm = 50.0821, GNorm = 1.0142, lr_0 = 7.1570e-04
Loss = 4.4754e-01, PNorm = 50.0981, GNorm = 1.7648, lr_0 = 7.1521e-04
Loss = 3.7204e-01, PNorm = 50.1072, GNorm = 0.9942, lr_0 = 7.1472e-04
Loss = 4.5795e-01, PNorm = 50.1238, GNorm = 0.8870, lr_0 = 7.1423e-04
Loss = 4.5742e-01, PNorm = 50.1450, GNorm = 1.6225, lr_0 = 7.1374e-04
Loss = 4.6134e-01, PNorm = 50.1650, GNorm = 2.1673, lr_0 = 7.1325e-04
Loss = 4.2689e-01, PNorm = 50.1792, GNorm = 1.9621, lr_0 = 7.1277e-04
Loss = 4.7967e-01, PNorm = 50.1890, GNorm = 1.0036, lr_0 = 7.1228e-04
Loss = 4.9607e-01, PNorm = 50.2061, GNorm = 0.8189, lr_0 = 7.1179e-04
Loss = 4.8965e-01, PNorm = 50.2221, GNorm = 1.1923, lr_0 = 7.1130e-04
Loss = 4.2567e-01, PNorm = 50.2402, GNorm = 1.3154, lr_0 = 7.1081e-04
Loss = 4.3282e-01, PNorm = 50.2547, GNorm = 1.5401, lr_0 = 7.1033e-04
Loss = 4.5280e-01, PNorm = 50.2684, GNorm = 1.6493, lr_0 = 7.0984e-04
Loss = 4.5853e-01, PNorm = 50.2906, GNorm = 1.4659, lr_0 = 7.0935e-04
Loss = 4.6140e-01, PNorm = 50.3100, GNorm = 1.3569, lr_0 = 7.0887e-04
Loss = 4.4548e-01, PNorm = 50.3300, GNorm = 1.0867, lr_0 = 7.0838e-04
Loss = 4.1265e-01, PNorm = 50.3389, GNorm = 1.0330, lr_0 = 7.0790e-04
Loss = 4.6179e-01, PNorm = 50.3526, GNorm = 2.3313, lr_0 = 7.0741e-04
Loss = 4.7487e-01, PNorm = 50.3689, GNorm = 1.4504, lr_0 = 7.0693e-04
Loss = 4.5819e-01, PNorm = 50.3851, GNorm = 2.0206, lr_0 = 7.0644e-04
Loss = 4.4666e-01, PNorm = 50.3946, GNorm = 1.4425, lr_0 = 7.0596e-04
Loss = 4.5431e-01, PNorm = 50.4121, GNorm = 1.2039, lr_0 = 7.0548e-04
Loss = 5.0338e-01, PNorm = 50.4283, GNorm = 1.5482, lr_0 = 7.0499e-04
Loss = 4.4574e-01, PNorm = 50.4419, GNorm = 1.2031, lr_0 = 7.0451e-04
Loss = 3.9401e-01, PNorm = 50.4525, GNorm = 0.9829, lr_0 = 7.0403e-04
Loss = 4.4484e-01, PNorm = 50.4675, GNorm = 1.4050, lr_0 = 7.0354e-04
Loss = 4.2992e-01, PNorm = 50.4784, GNorm = 1.0172, lr_0 = 7.0306e-04
Loss = 4.4321e-01, PNorm = 50.4935, GNorm = 1.3788, lr_0 = 7.0258e-04
Loss = 4.3966e-01, PNorm = 50.5057, GNorm = 1.0388, lr_0 = 7.0210e-04
Loss = 5.0252e-01, PNorm = 50.5165, GNorm = 1.1747, lr_0 = 7.0162e-04
Loss = 4.3858e-01, PNorm = 50.5350, GNorm = 1.5411, lr_0 = 7.0114e-04
Loss = 4.1392e-01, PNorm = 50.5450, GNorm = 1.2570, lr_0 = 7.0066e-04
Loss = 4.5687e-01, PNorm = 50.5617, GNorm = 2.0981, lr_0 = 7.0018e-04
Loss = 3.9940e-01, PNorm = 50.5756, GNorm = 1.0020, lr_0 = 6.9970e-04
Loss = 4.5491e-01, PNorm = 50.5887, GNorm = 1.0520, lr_0 = 6.9922e-04
Loss = 3.9591e-01, PNorm = 50.5959, GNorm = 1.6827, lr_0 = 6.9874e-04
Loss = 4.3742e-01, PNorm = 50.6111, GNorm = 1.0557, lr_0 = 6.9826e-04
Loss = 5.0359e-01, PNorm = 50.6236, GNorm = 1.0831, lr_0 = 6.9778e-04
Loss = 5.4433e-01, PNorm = 50.6355, GNorm = 1.5441, lr_0 = 6.9730e-04
Loss = 4.7886e-01, PNorm = 50.6487, GNorm = 1.2701, lr_0 = 6.9683e-04
Loss = 4.6608e-01, PNorm = 50.6629, GNorm = 1.1341, lr_0 = 6.9635e-04
Loss = 4.4472e-01, PNorm = 50.6784, GNorm = 1.6679, lr_0 = 6.9587e-04
Loss = 4.2771e-01, PNorm = 50.6925, GNorm = 1.4544, lr_0 = 6.9540e-04
Loss = 4.3478e-01, PNorm = 50.7077, GNorm = 1.1625, lr_0 = 6.9492e-04
Loss = 4.5905e-01, PNorm = 50.7224, GNorm = 1.4133, lr_0 = 6.9444e-04
Loss = 4.3187e-01, PNorm = 50.7308, GNorm = 1.1111, lr_0 = 6.9397e-04
Loss = 4.1511e-01, PNorm = 50.7479, GNorm = 1.4871, lr_0 = 6.9349e-04
Loss = 4.4416e-01, PNorm = 50.7583, GNorm = 1.3166, lr_0 = 6.9302e-04
Loss = 3.8513e-01, PNorm = 50.7665, GNorm = 1.0656, lr_0 = 6.9254e-04
Loss = 3.7981e-01, PNorm = 50.7774, GNorm = 0.8053, lr_0 = 6.9207e-04
Loss = 4.5510e-01, PNorm = 50.7888, GNorm = 1.3181, lr_0 = 6.9159e-04
Loss = 4.6736e-01, PNorm = 50.7981, GNorm = 1.2708, lr_0 = 6.9112e-04
Loss = 4.4958e-01, PNorm = 50.8124, GNorm = 1.5277, lr_0 = 6.9065e-04
Loss = 4.5300e-01, PNorm = 50.8363, GNorm = 1.2208, lr_0 = 6.9017e-04
Loss = 4.4430e-01, PNorm = 50.8534, GNorm = 1.5561, lr_0 = 6.8970e-04
Loss = 5.0500e-01, PNorm = 50.8684, GNorm = 1.2396, lr_0 = 6.8923e-04
Loss = 5.1386e-01, PNorm = 50.8861, GNorm = 1.4910, lr_0 = 6.8876e-04
Loss = 4.8912e-01, PNorm = 50.8959, GNorm = 2.8169, lr_0 = 6.8828e-04
Loss = 4.9486e-01, PNorm = 50.9071, GNorm = 1.2483, lr_0 = 6.8781e-04
Loss = 3.8503e-01, PNorm = 50.9277, GNorm = 2.0000, lr_0 = 6.8734e-04
Loss = 5.3621e-01, PNorm = 50.9496, GNorm = 1.2120, lr_0 = 6.8687e-04
Loss = 4.9194e-01, PNorm = 50.9691, GNorm = 1.9900, lr_0 = 6.8640e-04
Loss = 4.2469e-01, PNorm = 50.9927, GNorm = 2.0503, lr_0 = 6.8593e-04
Loss = 4.3337e-01, PNorm = 51.0073, GNorm = 1.4071, lr_0 = 6.8546e-04
Loss = 4.2839e-01, PNorm = 51.0140, GNorm = 1.5580, lr_0 = 6.8499e-04
Loss = 4.5772e-01, PNorm = 51.0260, GNorm = 1.0814, lr_0 = 6.8452e-04
Loss = 4.5692e-01, PNorm = 51.0411, GNorm = 1.3512, lr_0 = 6.8405e-04
Loss = 4.5850e-01, PNorm = 51.0610, GNorm = 1.2873, lr_0 = 6.8358e-04
Loss = 3.8032e-01, PNorm = 51.0795, GNorm = 0.8398, lr_0 = 6.8312e-04
Loss = 4.1603e-01, PNorm = 51.0942, GNorm = 2.1399, lr_0 = 6.8265e-04
Loss = 4.1269e-01, PNorm = 51.1049, GNorm = 1.2437, lr_0 = 6.8218e-04
Loss = 4.8423e-01, PNorm = 51.1168, GNorm = 1.6299, lr_0 = 6.8171e-04
Loss = 5.0085e-01, PNorm = 51.1283, GNorm = 2.9683, lr_0 = 6.8125e-04
Loss = 5.0071e-01, PNorm = 51.1463, GNorm = 1.1748, lr_0 = 6.8078e-04
Loss = 4.5332e-01, PNorm = 51.1672, GNorm = 1.5683, lr_0 = 6.8031e-04
Loss = 4.0458e-01, PNorm = 51.1801, GNorm = 1.7488, lr_0 = 6.7985e-04
Loss = 4.2251e-01, PNorm = 51.1931, GNorm = 1.0287, lr_0 = 6.7938e-04
Loss = 4.3692e-01, PNorm = 51.2043, GNorm = 1.5017, lr_0 = 6.7892e-04
Loss = 4.7105e-01, PNorm = 51.2214, GNorm = 2.0941, lr_0 = 6.7845e-04
Loss = 4.7095e-01, PNorm = 51.2346, GNorm = 2.0219, lr_0 = 6.7799e-04
Loss = 4.5033e-01, PNorm = 51.2485, GNorm = 1.4689, lr_0 = 6.7752e-04
Loss = 4.7267e-01, PNorm = 51.2611, GNorm = 1.2717, lr_0 = 6.7706e-04
Loss = 5.2674e-01, PNorm = 51.2811, GNorm = 2.2272, lr_0 = 6.7659e-04
Loss = 4.5375e-01, PNorm = 51.2971, GNorm = 1.1652, lr_0 = 6.7613e-04
Loss = 3.9611e-01, PNorm = 51.3133, GNorm = 1.1377, lr_0 = 6.7567e-04
Loss = 4.6157e-01, PNorm = 51.3297, GNorm = 1.7416, lr_0 = 6.7520e-04
Loss = 5.0492e-01, PNorm = 51.3527, GNorm = 1.7330, lr_0 = 6.7474e-04
Loss = 4.3791e-01, PNorm = 51.3650, GNorm = 1.0333, lr_0 = 6.7428e-04
Loss = 4.2295e-01, PNorm = 51.3775, GNorm = 1.1855, lr_0 = 6.7382e-04
Loss = 4.6453e-01, PNorm = 51.3949, GNorm = 1.1541, lr_0 = 6.7335e-04
Loss = 5.0481e-01, PNorm = 51.4090, GNorm = 0.9891, lr_0 = 6.7289e-04
Loss = 4.5555e-01, PNorm = 51.4221, GNorm = 1.2998, lr_0 = 6.7243e-04
Loss = 5.8016e-01, PNorm = 51.4398, GNorm = 3.0840, lr_0 = 6.7197e-04
Loss = 4.6301e-01, PNorm = 51.4491, GNorm = 1.1191, lr_0 = 6.7151e-04
Loss = 4.5122e-01, PNorm = 51.4657, GNorm = 0.8627, lr_0 = 6.7105e-04
Loss = 4.7260e-01, PNorm = 51.4802, GNorm = 1.2071, lr_0 = 6.7059e-04
Loss = 4.8722e-01, PNorm = 51.4910, GNorm = 1.1574, lr_0 = 6.7013e-04
Loss = 4.0667e-01, PNorm = 51.5059, GNorm = 0.9346, lr_0 = 6.6967e-04
Loss = 4.2880e-01, PNorm = 51.5152, GNorm = 0.9306, lr_0 = 6.6921e-04
Loss = 4.5535e-01, PNorm = 51.5293, GNorm = 2.0296, lr_0 = 6.6876e-04
Loss = 4.1670e-01, PNorm = 51.5438, GNorm = 0.9228, lr_0 = 6.6830e-04
Loss = 4.6944e-01, PNorm = 51.5578, GNorm = 1.2253, lr_0 = 6.6784e-04
Loss = 4.2576e-01, PNorm = 51.5689, GNorm = 1.3037, lr_0 = 6.6738e-04
Loss = 4.6835e-01, PNorm = 51.5906, GNorm = 1.5113, lr_0 = 6.6693e-04
Loss = 4.2865e-01, PNorm = 51.6102, GNorm = 0.9245, lr_0 = 6.6647e-04
Loss = 4.2288e-01, PNorm = 51.6223, GNorm = 1.1803, lr_0 = 6.6601e-04
Loss = 4.2914e-01, PNorm = 51.6381, GNorm = 1.4565, lr_0 = 6.6556e-04
Loss = 3.8989e-01, PNorm = 51.6587, GNorm = 1.2933, lr_0 = 6.6510e-04
Loss = 4.8421e-01, PNorm = 51.6726, GNorm = 2.8418, lr_0 = 6.6464e-04
Loss = 4.0863e-01, PNorm = 51.6890, GNorm = 1.0123, lr_0 = 6.6419e-04
Loss = 4.4327e-01, PNorm = 51.7091, GNorm = 1.3420, lr_0 = 6.6373e-04
Loss = 4.5629e-01, PNorm = 51.7262, GNorm = 1.5871, lr_0 = 6.6328e-04
Loss = 4.9074e-01, PNorm = 51.7406, GNorm = 1.4693, lr_0 = 6.6282e-04
Validation mae = 0.119858
Epoch 7
Loss = 4.5377e-01, PNorm = 51.7594, GNorm = 0.9867, lr_0 = 6.6237e-04
Loss = 4.0103e-01, PNorm = 51.7774, GNorm = 1.2144, lr_0 = 6.6192e-04
Loss = 4.3556e-01, PNorm = 51.7912, GNorm = 1.3413, lr_0 = 6.6146e-04
Loss = 4.3864e-01, PNorm = 51.8033, GNorm = 1.3121, lr_0 = 6.6101e-04
Loss = 4.5560e-01, PNorm = 51.8120, GNorm = 1.4903, lr_0 = 6.6056e-04
Loss = 4.4365e-01, PNorm = 51.8277, GNorm = 1.2923, lr_0 = 6.6011e-04
Loss = 4.4392e-01, PNorm = 51.8518, GNorm = 1.5078, lr_0 = 6.5965e-04
Loss = 4.4431e-01, PNorm = 51.8659, GNorm = 1.0571, lr_0 = 6.5920e-04
Loss = 4.0909e-01, PNorm = 51.8847, GNorm = 1.5247, lr_0 = 6.5875e-04
Loss = 3.5113e-01, PNorm = 51.8910, GNorm = 1.2052, lr_0 = 6.5830e-04
Loss = 4.3650e-01, PNorm = 51.8997, GNorm = 1.2148, lr_0 = 6.5785e-04
Loss = 4.5310e-01, PNorm = 51.9164, GNorm = 2.1138, lr_0 = 6.5740e-04
Loss = 4.3005e-01, PNorm = 51.9327, GNorm = 1.3258, lr_0 = 6.5695e-04
Loss = 4.4603e-01, PNorm = 51.9443, GNorm = 1.2729, lr_0 = 6.5650e-04
Loss = 4.1601e-01, PNorm = 51.9586, GNorm = 2.1112, lr_0 = 6.5605e-04
Loss = 3.8557e-01, PNorm = 51.9729, GNorm = 1.3168, lr_0 = 6.5560e-04
Loss = 4.0723e-01, PNorm = 51.9925, GNorm = 1.3814, lr_0 = 6.5515e-04
Loss = 4.1520e-01, PNorm = 52.0092, GNorm = 1.0236, lr_0 = 6.5470e-04
Loss = 4.4367e-01, PNorm = 52.0271, GNorm = 0.9837, lr_0 = 6.5425e-04
Loss = 4.1907e-01, PNorm = 52.0428, GNorm = 1.3603, lr_0 = 6.5380e-04
Loss = 4.7229e-01, PNorm = 52.0589, GNorm = 1.1081, lr_0 = 6.5335e-04
Loss = 4.8398e-01, PNorm = 52.0740, GNorm = 2.1851, lr_0 = 6.5291e-04
Loss = 4.6486e-01, PNorm = 52.0875, GNorm = 2.1634, lr_0 = 6.5246e-04
Loss = 5.1767e-01, PNorm = 52.1003, GNorm = 1.2597, lr_0 = 6.5201e-04
Loss = 4.2033e-01, PNorm = 52.1153, GNorm = 1.1862, lr_0 = 6.5157e-04
Loss = 4.6057e-01, PNorm = 52.1301, GNorm = 1.1304, lr_0 = 6.5112e-04
Loss = 4.4872e-01, PNorm = 52.1416, GNorm = 1.5435, lr_0 = 6.5067e-04
Loss = 4.8023e-01, PNorm = 52.1527, GNorm = 1.4617, lr_0 = 6.5023e-04
Loss = 4.1619e-01, PNorm = 52.1702, GNorm = 1.0895, lr_0 = 6.4978e-04
Loss = 4.8373e-01, PNorm = 52.1821, GNorm = 2.0214, lr_0 = 6.4934e-04
Loss = 5.3049e-01, PNorm = 52.1978, GNorm = 1.2751, lr_0 = 6.4889e-04
Loss = 4.5727e-01, PNorm = 52.2116, GNorm = 1.7494, lr_0 = 6.4845e-04
Loss = 4.9394e-01, PNorm = 52.2384, GNorm = 1.4363, lr_0 = 6.4800e-04
Loss = 4.8808e-01, PNorm = 52.2603, GNorm = 1.5364, lr_0 = 6.4756e-04
Loss = 4.8674e-01, PNorm = 52.2677, GNorm = 2.0499, lr_0 = 6.4712e-04
Loss = 4.3401e-01, PNorm = 52.2780, GNorm = 1.1140, lr_0 = 6.4667e-04
Loss = 4.2510e-01, PNorm = 52.2913, GNorm = 1.3200, lr_0 = 6.4623e-04
Loss = 4.5704e-01, PNorm = 52.3078, GNorm = 1.3192, lr_0 = 6.4579e-04
Loss = 4.4934e-01, PNorm = 52.3248, GNorm = 1.3818, lr_0 = 6.4534e-04
Loss = 3.9306e-01, PNorm = 52.3411, GNorm = 1.3750, lr_0 = 6.4490e-04
Loss = 4.5496e-01, PNorm = 52.3575, GNorm = 1.9576, lr_0 = 6.4446e-04
Loss = 4.1608e-01, PNorm = 52.3727, GNorm = 1.6249, lr_0 = 6.4402e-04
Loss = 4.0946e-01, PNorm = 52.3850, GNorm = 1.4594, lr_0 = 6.4358e-04
Loss = 4.9447e-01, PNorm = 52.4043, GNorm = 1.7787, lr_0 = 6.4314e-04
Loss = 4.8291e-01, PNorm = 52.4168, GNorm = 1.7120, lr_0 = 6.4270e-04
Loss = 4.7078e-01, PNorm = 52.4274, GNorm = 2.0850, lr_0 = 6.4226e-04
Loss = 4.5296e-01, PNorm = 52.4400, GNorm = 2.0537, lr_0 = 6.4182e-04
Loss = 4.1248e-01, PNorm = 52.4534, GNorm = 1.0830, lr_0 = 6.4138e-04
Loss = 4.7822e-01, PNorm = 52.4699, GNorm = 1.5032, lr_0 = 6.4094e-04
Loss = 4.0807e-01, PNorm = 52.4807, GNorm = 0.9865, lr_0 = 6.4050e-04
Loss = 4.7646e-01, PNorm = 52.4904, GNorm = 1.4239, lr_0 = 6.4006e-04
Loss = 4.1667e-01, PNorm = 52.5062, GNorm = 1.3860, lr_0 = 6.3962e-04
Loss = 4.8217e-01, PNorm = 52.5258, GNorm = 1.3374, lr_0 = 6.3918e-04
Loss = 4.6264e-01, PNorm = 52.5363, GNorm = 1.2305, lr_0 = 6.3874e-04
Loss = 4.1197e-01, PNorm = 52.5425, GNorm = 1.5053, lr_0 = 6.3831e-04
Loss = 3.7575e-01, PNorm = 52.5483, GNorm = 0.9507, lr_0 = 6.3787e-04
Loss = 4.9419e-01, PNorm = 52.5633, GNorm = 1.1437, lr_0 = 6.3743e-04
Loss = 4.4429e-01, PNorm = 52.5810, GNorm = 1.4431, lr_0 = 6.3700e-04
Loss = 4.8353e-01, PNorm = 52.5997, GNorm = 3.0251, lr_0 = 6.3656e-04
Loss = 4.1925e-01, PNorm = 52.6106, GNorm = 2.1549, lr_0 = 6.3612e-04
Loss = 4.6873e-01, PNorm = 52.6270, GNorm = 1.7123, lr_0 = 6.3569e-04
Loss = 4.7628e-01, PNorm = 52.6501, GNorm = 1.3218, lr_0 = 6.3525e-04
Loss = 3.7297e-01, PNorm = 52.6665, GNorm = 2.0569, lr_0 = 6.3482e-04
Loss = 4.2310e-01, PNorm = 52.6799, GNorm = 1.2883, lr_0 = 6.3438e-04
Loss = 4.2134e-01, PNorm = 52.6925, GNorm = 1.4002, lr_0 = 6.3395e-04
Loss = 4.2027e-01, PNorm = 52.7054, GNorm = 1.4878, lr_0 = 6.3351e-04
Loss = 4.1670e-01, PNorm = 52.7177, GNorm = 1.2159, lr_0 = 6.3308e-04
Loss = 4.2348e-01, PNorm = 52.7305, GNorm = 1.7535, lr_0 = 6.3265e-04
Loss = 5.2680e-01, PNorm = 52.7471, GNorm = 1.1911, lr_0 = 6.3221e-04
Loss = 4.0173e-01, PNorm = 52.7619, GNorm = 0.8734, lr_0 = 6.3178e-04
Loss = 4.5059e-01, PNorm = 52.7757, GNorm = 1.7189, lr_0 = 6.3135e-04
Loss = 4.3556e-01, PNorm = 52.8001, GNorm = 1.4754, lr_0 = 6.3091e-04
Loss = 4.2820e-01, PNorm = 52.8060, GNorm = 1.4456, lr_0 = 6.3048e-04
Loss = 4.5128e-01, PNorm = 52.8154, GNorm = 1.2397, lr_0 = 6.3005e-04
Loss = 4.4600e-01, PNorm = 52.8350, GNorm = 2.0444, lr_0 = 6.2962e-04
Loss = 3.9386e-01, PNorm = 52.8475, GNorm = 1.8638, lr_0 = 6.2919e-04
Loss = 4.7598e-01, PNorm = 52.8589, GNorm = 2.1430, lr_0 = 6.2876e-04
Loss = 4.6514e-01, PNorm = 52.8746, GNorm = 1.2819, lr_0 = 6.2833e-04
Loss = 4.2763e-01, PNorm = 52.8906, GNorm = 1.1592, lr_0 = 6.2789e-04
Loss = 4.3066e-01, PNorm = 52.9064, GNorm = 1.1367, lr_0 = 6.2746e-04
Loss = 4.0400e-01, PNorm = 52.9157, GNorm = 1.5408, lr_0 = 6.2703e-04
Loss = 4.1348e-01, PNorm = 52.9224, GNorm = 1.3091, lr_0 = 6.2661e-04
Loss = 3.7903e-01, PNorm = 52.9374, GNorm = 1.4184, lr_0 = 6.2618e-04
Loss = 4.3459e-01, PNorm = 52.9435, GNorm = 1.0179, lr_0 = 6.2575e-04
Loss = 4.5257e-01, PNorm = 52.9553, GNorm = 1.2442, lr_0 = 6.2532e-04
Loss = 4.2118e-01, PNorm = 52.9760, GNorm = 0.9659, lr_0 = 6.2489e-04
Loss = 4.4891e-01, PNorm = 52.9883, GNorm = 1.1562, lr_0 = 6.2446e-04
Loss = 4.6501e-01, PNorm = 52.9960, GNorm = 2.1270, lr_0 = 6.2403e-04
Loss = 4.0817e-01, PNorm = 53.0048, GNorm = 1.6851, lr_0 = 6.2361e-04
Loss = 4.1628e-01, PNorm = 53.0192, GNorm = 1.1112, lr_0 = 6.2318e-04
Loss = 4.7595e-01, PNorm = 53.0198, GNorm = 1.1730, lr_0 = 6.2275e-04
Loss = 3.9375e-01, PNorm = 53.0316, GNorm = 1.1768, lr_0 = 6.2233e-04
Loss = 3.9570e-01, PNorm = 53.0430, GNorm = 1.6353, lr_0 = 6.2190e-04
Loss = 4.0444e-01, PNorm = 53.0527, GNorm = 2.0308, lr_0 = 6.2147e-04
Loss = 4.6949e-01, PNorm = 53.0603, GNorm = 1.6063, lr_0 = 6.2105e-04
Loss = 4.7060e-01, PNorm = 53.0768, GNorm = 1.0503, lr_0 = 6.2062e-04
Loss = 4.7176e-01, PNorm = 53.0897, GNorm = 1.4054, lr_0 = 6.2020e-04
Loss = 4.0191e-01, PNorm = 53.1013, GNorm = 1.3634, lr_0 = 6.1977e-04
Loss = 4.1403e-01, PNorm = 53.1057, GNorm = 0.9404, lr_0 = 6.1935e-04
Loss = 4.7314e-01, PNorm = 53.1163, GNorm = 1.3412, lr_0 = 6.1892e-04
Loss = 4.2954e-01, PNorm = 53.1338, GNorm = 0.7783, lr_0 = 6.1850e-04
Loss = 4.5441e-01, PNorm = 53.1449, GNorm = 1.8838, lr_0 = 6.1808e-04
Loss = 4.0326e-01, PNorm = 53.1574, GNorm = 1.4137, lr_0 = 6.1765e-04
Loss = 4.2154e-01, PNorm = 53.1731, GNorm = 1.3185, lr_0 = 6.1723e-04
Loss = 4.5445e-01, PNorm = 53.1882, GNorm = 1.4757, lr_0 = 6.1681e-04
Loss = 4.7712e-01, PNorm = 53.2042, GNorm = 1.1398, lr_0 = 6.1638e-04
Loss = 4.6793e-01, PNorm = 53.2234, GNorm = 3.5809, lr_0 = 6.1596e-04
Loss = 4.0260e-01, PNorm = 53.2411, GNorm = 0.9215, lr_0 = 6.1554e-04
Loss = 4.3944e-01, PNorm = 53.2548, GNorm = 1.4394, lr_0 = 6.1512e-04
Loss = 4.1472e-01, PNorm = 53.2658, GNorm = 1.3707, lr_0 = 6.1470e-04
Loss = 5.0696e-01, PNorm = 53.2771, GNorm = 1.0662, lr_0 = 6.1428e-04
Loss = 3.9434e-01, PNorm = 53.2961, GNorm = 1.0676, lr_0 = 6.1385e-04
Loss = 4.0903e-01, PNorm = 53.3120, GNorm = 1.3736, lr_0 = 6.1343e-04
Loss = 4.1979e-01, PNorm = 53.3274, GNorm = 0.9790, lr_0 = 6.1301e-04
Loss = 4.4909e-01, PNorm = 53.3407, GNorm = 1.7074, lr_0 = 6.1259e-04
Loss = 4.4059e-01, PNorm = 53.3493, GNorm = 0.9045, lr_0 = 6.1217e-04
Loss = 5.1089e-01, PNorm = 53.3619, GNorm = 1.5992, lr_0 = 6.1175e-04
Loss = 4.4728e-01, PNorm = 53.3762, GNorm = 1.9120, lr_0 = 6.1134e-04
Loss = 4.5888e-01, PNorm = 53.3867, GNorm = 1.7396, lr_0 = 6.1092e-04
Loss = 4.5049e-01, PNorm = 53.3955, GNorm = 1.5163, lr_0 = 6.1050e-04
Validation mae = 0.118357
Epoch 8
Loss = 3.5909e-01, PNorm = 53.4074, GNorm = 0.9511, lr_0 = 6.1008e-04
Loss = 4.3542e-01, PNorm = 53.4236, GNorm = 1.3386, lr_0 = 6.0966e-04
Loss = 4.1224e-01, PNorm = 53.4363, GNorm = 1.5668, lr_0 = 6.0924e-04
Loss = 4.5644e-01, PNorm = 53.4487, GNorm = 0.9461, lr_0 = 6.0883e-04
Loss = 4.5572e-01, PNorm = 53.4600, GNorm = 0.9737, lr_0 = 6.0841e-04
Loss = 4.5975e-01, PNorm = 53.4765, GNorm = 1.6888, lr_0 = 6.0799e-04
Loss = 4.2197e-01, PNorm = 53.4910, GNorm = 1.9248, lr_0 = 6.0758e-04
Loss = 4.4833e-01, PNorm = 53.5027, GNorm = 1.1314, lr_0 = 6.0716e-04
Loss = 3.9969e-01, PNorm = 53.5125, GNorm = 1.1810, lr_0 = 6.0674e-04
Loss = 3.7760e-01, PNorm = 53.5193, GNorm = 1.2362, lr_0 = 6.0633e-04
Loss = 5.0776e-01, PNorm = 53.5325, GNorm = 1.5866, lr_0 = 6.0591e-04
Loss = 4.5397e-01, PNorm = 53.5423, GNorm = 1.0914, lr_0 = 6.0550e-04
Loss = 4.5999e-01, PNorm = 53.5583, GNorm = 1.2479, lr_0 = 6.0508e-04
Loss = 4.7816e-01, PNorm = 53.5718, GNorm = 1.8446, lr_0 = 6.0467e-04
Loss = 4.3236e-01, PNorm = 53.5853, GNorm = 1.2715, lr_0 = 6.0425e-04
Loss = 3.7738e-01, PNorm = 53.5964, GNorm = 1.1983, lr_0 = 6.0384e-04
Loss = 4.3965e-01, PNorm = 53.6085, GNorm = 1.5631, lr_0 = 6.0343e-04
Loss = 4.7421e-01, PNorm = 53.6163, GNorm = 1.4849, lr_0 = 6.0301e-04
Loss = 4.2072e-01, PNorm = 53.6285, GNorm = 1.7474, lr_0 = 6.0260e-04
Loss = 4.3044e-01, PNorm = 53.6422, GNorm = 1.1771, lr_0 = 6.0219e-04
Loss = 4.6100e-01, PNorm = 53.6549, GNorm = 1.0397, lr_0 = 6.0178e-04
Loss = 4.7743e-01, PNorm = 53.6666, GNorm = 1.4010, lr_0 = 6.0136e-04
Loss = 4.4743e-01, PNorm = 53.6815, GNorm = 0.9440, lr_0 = 6.0095e-04
Loss = 4.5338e-01, PNorm = 53.6957, GNorm = 1.3798, lr_0 = 6.0054e-04
Loss = 4.0866e-01, PNorm = 53.7113, GNorm = 0.7619, lr_0 = 6.0013e-04
Loss = 3.9317e-01, PNorm = 53.7250, GNorm = 1.5021, lr_0 = 5.9972e-04
Loss = 4.5495e-01, PNorm = 53.7373, GNorm = 1.3918, lr_0 = 5.9931e-04
Loss = 4.7034e-01, PNorm = 53.7498, GNorm = 1.5590, lr_0 = 5.9890e-04
Loss = 4.2800e-01, PNorm = 53.7579, GNorm = 1.9327, lr_0 = 5.9849e-04
Loss = 4.4254e-01, PNorm = 53.7720, GNorm = 1.2546, lr_0 = 5.9808e-04
Loss = 4.1872e-01, PNorm = 53.7849, GNorm = 1.3235, lr_0 = 5.9767e-04
Loss = 4.3742e-01, PNorm = 53.7941, GNorm = 1.4006, lr_0 = 5.9726e-04
Loss = 4.2537e-01, PNorm = 53.8046, GNorm = 1.0637, lr_0 = 5.9685e-04
Loss = 4.2018e-01, PNorm = 53.8120, GNorm = 1.0601, lr_0 = 5.9644e-04
Loss = 4.3312e-01, PNorm = 53.8280, GNorm = 2.1476, lr_0 = 5.9603e-04
Loss = 4.8219e-01, PNorm = 53.8413, GNorm = 1.5055, lr_0 = 5.9562e-04
Loss = 4.2557e-01, PNorm = 53.8519, GNorm = 1.2479, lr_0 = 5.9521e-04
Loss = 3.8205e-01, PNorm = 53.8594, GNorm = 1.9092, lr_0 = 5.9481e-04
Loss = 3.8794e-01, PNorm = 53.8755, GNorm = 1.5731, lr_0 = 5.9440e-04
Loss = 4.7317e-01, PNorm = 53.8862, GNorm = 2.6032, lr_0 = 5.9399e-04
Loss = 4.1019e-01, PNorm = 53.9020, GNorm = 1.6018, lr_0 = 5.9358e-04
Loss = 5.8324e-01, PNorm = 53.9174, GNorm = 1.2902, lr_0 = 5.9318e-04
Loss = 5.5291e-01, PNorm = 53.9303, GNorm = 1.4849, lr_0 = 5.9277e-04
Loss = 4.5632e-01, PNorm = 53.9473, GNorm = 1.7914, lr_0 = 5.9236e-04
Loss = 4.0874e-01, PNorm = 53.9606, GNorm = 1.1850, lr_0 = 5.9196e-04
Loss = 4.4885e-01, PNorm = 53.9715, GNorm = 0.9010, lr_0 = 5.9155e-04
Loss = 4.3295e-01, PNorm = 53.9858, GNorm = 2.1411, lr_0 = 5.9115e-04
Loss = 3.9712e-01, PNorm = 53.9993, GNorm = 1.3306, lr_0 = 5.9074e-04
Loss = 4.0562e-01, PNorm = 54.0129, GNorm = 1.3616, lr_0 = 5.9034e-04
Loss = 3.5957e-01, PNorm = 54.0239, GNorm = 1.1513, lr_0 = 5.8993e-04
Loss = 4.1340e-01, PNorm = 54.0348, GNorm = 2.6929, lr_0 = 5.8953e-04
Loss = 4.7079e-01, PNorm = 54.0457, GNorm = 1.5376, lr_0 = 5.8913e-04
Loss = 4.1272e-01, PNorm = 54.0584, GNorm = 1.1239, lr_0 = 5.8872e-04
Loss = 4.3745e-01, PNorm = 54.0613, GNorm = 1.3151, lr_0 = 5.8832e-04
Loss = 4.6252e-01, PNorm = 54.0765, GNorm = 1.4383, lr_0 = 5.8792e-04
Loss = 4.7645e-01, PNorm = 54.0886, GNorm = 1.3125, lr_0 = 5.8751e-04
Loss = 4.4003e-01, PNorm = 54.1008, GNorm = 1.6739, lr_0 = 5.8711e-04
Loss = 4.1820e-01, PNorm = 54.1165, GNorm = 1.3631, lr_0 = 5.8671e-04
Loss = 3.9371e-01, PNorm = 54.1321, GNorm = 1.4630, lr_0 = 5.8631e-04
Loss = 3.4148e-01, PNorm = 54.1421, GNorm = 1.0171, lr_0 = 5.8591e-04
Loss = 4.1250e-01, PNorm = 54.1491, GNorm = 2.6501, lr_0 = 5.8550e-04
Loss = 3.9185e-01, PNorm = 54.1564, GNorm = 1.5108, lr_0 = 5.8510e-04
Loss = 4.2231e-01, PNorm = 54.1650, GNorm = 1.1103, lr_0 = 5.8470e-04
Loss = 4.2271e-01, PNorm = 54.1781, GNorm = 1.2191, lr_0 = 5.8430e-04
Loss = 4.0751e-01, PNorm = 54.1875, GNorm = 1.0859, lr_0 = 5.8390e-04
Loss = 4.1867e-01, PNorm = 54.1985, GNorm = 1.7088, lr_0 = 5.8350e-04
Loss = 4.6311e-01, PNorm = 54.2119, GNorm = 1.7676, lr_0 = 5.8310e-04
Loss = 4.1336e-01, PNorm = 54.2338, GNorm = 1.1870, lr_0 = 5.8270e-04
Loss = 4.5877e-01, PNorm = 54.2420, GNorm = 1.1376, lr_0 = 5.8230e-04
Loss = 3.8766e-01, PNorm = 54.2454, GNorm = 1.2503, lr_0 = 5.8190e-04
Loss = 4.0962e-01, PNorm = 54.2560, GNorm = 1.8293, lr_0 = 5.8151e-04
Loss = 4.7228e-01, PNorm = 54.2637, GNorm = 1.1558, lr_0 = 5.8111e-04
Loss = 4.0363e-01, PNorm = 54.2716, GNorm = 1.1404, lr_0 = 5.8071e-04
Loss = 3.8799e-01, PNorm = 54.2861, GNorm = 1.1928, lr_0 = 5.8031e-04
Loss = 4.3749e-01, PNorm = 54.3048, GNorm = 1.1056, lr_0 = 5.7991e-04
Loss = 4.6923e-01, PNorm = 54.3186, GNorm = 1.1360, lr_0 = 5.7952e-04
Loss = 4.8199e-01, PNorm = 54.3361, GNorm = 1.0880, lr_0 = 5.7912e-04
Loss = 4.5203e-01, PNorm = 54.3451, GNorm = 1.3557, lr_0 = 5.7872e-04
Loss = 4.0908e-01, PNorm = 54.3557, GNorm = 1.2600, lr_0 = 5.7833e-04
Loss = 4.2583e-01, PNorm = 54.3696, GNorm = 1.1862, lr_0 = 5.7793e-04
Loss = 4.7812e-01, PNorm = 54.3848, GNorm = 1.5742, lr_0 = 5.7753e-04
Loss = 4.2301e-01, PNorm = 54.4004, GNorm = 2.0548, lr_0 = 5.7714e-04
Loss = 4.2522e-01, PNorm = 54.4069, GNorm = 1.3562, lr_0 = 5.7674e-04
Loss = 3.7080e-01, PNorm = 54.4168, GNorm = 1.3395, lr_0 = 5.7635e-04
Loss = 4.3825e-01, PNorm = 54.4282, GNorm = 1.7599, lr_0 = 5.7595e-04
Loss = 3.5581e-01, PNorm = 54.4340, GNorm = 1.1868, lr_0 = 5.7556e-04
Loss = 4.0088e-01, PNorm = 54.4445, GNorm = 1.1929, lr_0 = 5.7516e-04
Loss = 4.0513e-01, PNorm = 54.4557, GNorm = 1.5364, lr_0 = 5.7477e-04
Loss = 3.8241e-01, PNorm = 54.4664, GNorm = 1.4516, lr_0 = 5.7438e-04
Loss = 4.4745e-01, PNorm = 54.4784, GNorm = 1.0209, lr_0 = 5.7398e-04
Loss = 3.8844e-01, PNorm = 54.4925, GNorm = 1.7561, lr_0 = 5.7359e-04
Loss = 3.7516e-01, PNorm = 54.5037, GNorm = 1.3319, lr_0 = 5.7320e-04
Loss = 4.9028e-01, PNorm = 54.5076, GNorm = 1.6416, lr_0 = 5.7280e-04
Loss = 4.6064e-01, PNorm = 54.5268, GNorm = 1.3242, lr_0 = 5.7241e-04
Loss = 4.4072e-01, PNorm = 54.5366, GNorm = 1.6332, lr_0 = 5.7202e-04
Loss = 4.2896e-01, PNorm = 54.5499, GNorm = 1.3693, lr_0 = 5.7163e-04
Loss = 4.1365e-01, PNorm = 54.5588, GNorm = 1.0783, lr_0 = 5.7124e-04
Loss = 4.3206e-01, PNorm = 54.5689, GNorm = 1.5654, lr_0 = 5.7084e-04
Loss = 4.1491e-01, PNorm = 54.5816, GNorm = 1.3209, lr_0 = 5.7045e-04
Loss = 4.4531e-01, PNorm = 54.5939, GNorm = 0.9883, lr_0 = 5.7006e-04
Loss = 4.6650e-01, PNorm = 54.6053, GNorm = 1.4843, lr_0 = 5.6967e-04
Loss = 4.5416e-01, PNorm = 54.6186, GNorm = 1.7698, lr_0 = 5.6928e-04
Loss = 4.0158e-01, PNorm = 54.6320, GNorm = 0.9047, lr_0 = 5.6889e-04
Loss = 4.3273e-01, PNorm = 54.6408, GNorm = 1.3990, lr_0 = 5.6850e-04
Loss = 3.9892e-01, PNorm = 54.6456, GNorm = 1.3046, lr_0 = 5.6811e-04
Loss = 3.3245e-01, PNorm = 54.6516, GNorm = 1.0026, lr_0 = 5.6772e-04
Loss = 4.5043e-01, PNorm = 54.6540, GNorm = 1.0994, lr_0 = 5.6733e-04
Loss = 4.4365e-01, PNorm = 54.6668, GNorm = 1.0899, lr_0 = 5.6695e-04
Loss = 4.4659e-01, PNorm = 54.6821, GNorm = 1.2297, lr_0 = 5.6656e-04
Loss = 4.0208e-01, PNorm = 54.6958, GNorm = 1.5282, lr_0 = 5.6617e-04
Loss = 5.0850e-01, PNorm = 54.7000, GNorm = 3.0907, lr_0 = 5.6578e-04
Loss = 4.1212e-01, PNorm = 54.7119, GNorm = 1.1135, lr_0 = 5.6539e-04
Loss = 4.3172e-01, PNorm = 54.7261, GNorm = 1.3828, lr_0 = 5.6501e-04
Loss = 4.6529e-01, PNorm = 54.7371, GNorm = 1.8500, lr_0 = 5.6462e-04
Loss = 4.2579e-01, PNorm = 54.7450, GNorm = 2.1906, lr_0 = 5.6423e-04
Loss = 4.3165e-01, PNorm = 54.7611, GNorm = 1.3675, lr_0 = 5.6385e-04
Loss = 4.6513e-01, PNorm = 54.7713, GNorm = 1.5646, lr_0 = 5.6346e-04
Loss = 4.3472e-01, PNorm = 54.7834, GNorm = 1.9037, lr_0 = 5.6307e-04
Loss = 4.5349e-01, PNorm = 54.7866, GNorm = 1.1721, lr_0 = 5.6269e-04
Loss = 3.9679e-01, PNorm = 54.8016, GNorm = 1.4863, lr_0 = 5.6230e-04
Validation mae = 0.116791
Epoch 9
Loss = 3.6763e-01, PNorm = 54.8142, GNorm = 1.1737, lr_0 = 5.6192e-04
Loss = 4.3685e-01, PNorm = 54.8318, GNorm = 1.6448, lr_0 = 5.6153e-04
Loss = 4.2554e-01, PNorm = 54.8417, GNorm = 1.5771, lr_0 = 5.6115e-04
Loss = 4.0461e-01, PNorm = 54.8514, GNorm = 2.5205, lr_0 = 5.6076e-04
Loss = 4.9661e-01, PNorm = 54.8553, GNorm = 2.1831, lr_0 = 5.6038e-04
Loss = 4.4211e-01, PNorm = 54.8709, GNorm = 1.3340, lr_0 = 5.6000e-04
Loss = 4.1721e-01, PNorm = 54.8855, GNorm = 1.6112, lr_0 = 5.5961e-04
Loss = 4.5131e-01, PNorm = 54.8999, GNorm = 1.4685, lr_0 = 5.5923e-04
Loss = 3.9184e-01, PNorm = 54.9111, GNorm = 0.9893, lr_0 = 5.5885e-04
Loss = 3.7676e-01, PNorm = 54.9257, GNorm = 1.6038, lr_0 = 5.5846e-04
Loss = 4.4307e-01, PNorm = 54.9285, GNorm = 1.3275, lr_0 = 5.5808e-04
Loss = 4.0179e-01, PNorm = 54.9363, GNorm = 1.2790, lr_0 = 5.5770e-04
Loss = 4.3180e-01, PNorm = 54.9478, GNorm = 1.6770, lr_0 = 5.5732e-04
Loss = 3.4553e-01, PNorm = 54.9607, GNorm = 1.3886, lr_0 = 5.5693e-04
Loss = 4.1687e-01, PNorm = 54.9674, GNorm = 2.1504, lr_0 = 5.5655e-04
Loss = 4.1432e-01, PNorm = 54.9826, GNorm = 1.6676, lr_0 = 5.5617e-04
Loss = 4.3859e-01, PNorm = 54.9941, GNorm = 1.0524, lr_0 = 5.5579e-04
Loss = 4.1696e-01, PNorm = 55.0092, GNorm = 1.4906, lr_0 = 5.5541e-04
Loss = 4.1497e-01, PNorm = 55.0213, GNorm = 1.8088, lr_0 = 5.5503e-04
Loss = 4.1011e-01, PNorm = 55.0356, GNorm = 1.3899, lr_0 = 5.5465e-04
Loss = 4.8677e-01, PNorm = 55.0459, GNorm = 1.0608, lr_0 = 5.5427e-04
Loss = 4.1559e-01, PNorm = 55.0544, GNorm = 1.1472, lr_0 = 5.5389e-04
Loss = 4.4720e-01, PNorm = 55.0637, GNorm = 1.8820, lr_0 = 5.5351e-04
Loss = 4.2633e-01, PNorm = 55.0769, GNorm = 1.6864, lr_0 = 5.5313e-04
Loss = 3.9268e-01, PNorm = 55.0847, GNorm = 1.5238, lr_0 = 5.5275e-04
Loss = 4.3174e-01, PNorm = 55.1012, GNorm = 1.8300, lr_0 = 5.5237e-04
Loss = 4.3945e-01, PNorm = 55.1167, GNorm = 1.3562, lr_0 = 5.5199e-04
Loss = 4.5039e-01, PNorm = 55.1303, GNorm = 2.3402, lr_0 = 5.5162e-04
Loss = 3.8369e-01, PNorm = 55.1379, GNorm = 1.1237, lr_0 = 5.5124e-04
Loss = 3.8904e-01, PNorm = 55.1425, GNorm = 1.5359, lr_0 = 5.5086e-04
Loss = 3.7992e-01, PNorm = 55.1469, GNorm = 1.1777, lr_0 = 5.5048e-04
Loss = 4.5790e-01, PNorm = 55.1512, GNorm = 1.9229, lr_0 = 5.5011e-04
Loss = 4.1853e-01, PNorm = 55.1589, GNorm = 1.3284, lr_0 = 5.4973e-04
Loss = 4.5689e-01, PNorm = 55.1679, GNorm = 1.3358, lr_0 = 5.4935e-04
Loss = 4.9374e-01, PNorm = 55.1815, GNorm = 1.8478, lr_0 = 5.4898e-04
Loss = 4.1824e-01, PNorm = 55.1881, GNorm = 2.0281, lr_0 = 5.4860e-04
Loss = 4.3501e-01, PNorm = 55.1968, GNorm = 1.2304, lr_0 = 5.4822e-04
Loss = 3.7861e-01, PNorm = 55.2067, GNorm = 2.0120, lr_0 = 5.4785e-04
Loss = 4.5969e-01, PNorm = 55.2161, GNorm = 1.5694, lr_0 = 5.4747e-04
Loss = 4.1682e-01, PNorm = 55.2267, GNorm = 4.9338, lr_0 = 5.4710e-04
Loss = 4.6256e-01, PNorm = 55.2350, GNorm = 1.3961, lr_0 = 5.4672e-04
Loss = 4.5040e-01, PNorm = 55.2488, GNorm = 1.9204, lr_0 = 5.4635e-04
Loss = 4.8234e-01, PNorm = 55.2537, GNorm = 1.9377, lr_0 = 5.4597e-04
Loss = 3.9469e-01, PNorm = 55.2685, GNorm = 0.8987, lr_0 = 5.4560e-04
Loss = 4.3580e-01, PNorm = 55.2798, GNorm = 1.7266, lr_0 = 5.4523e-04
Loss = 4.4927e-01, PNorm = 55.2916, GNorm = 1.4906, lr_0 = 5.4485e-04
Loss = 4.3871e-01, PNorm = 55.3029, GNorm = 1.7944, lr_0 = 5.4448e-04
Loss = 4.1220e-01, PNorm = 55.3221, GNorm = 1.1743, lr_0 = 5.4411e-04
Loss = 4.2499e-01, PNorm = 55.3369, GNorm = 1.0279, lr_0 = 5.4373e-04
Loss = 4.3576e-01, PNorm = 55.3421, GNorm = 1.1886, lr_0 = 5.4336e-04
Loss = 3.9315e-01, PNorm = 55.3529, GNorm = 1.1369, lr_0 = 5.4299e-04
Loss = 4.0099e-01, PNorm = 55.3688, GNorm = 2.4050, lr_0 = 5.4262e-04
Loss = 4.1117e-01, PNorm = 55.3822, GNorm = 0.9232, lr_0 = 5.4225e-04
Loss = 3.9618e-01, PNorm = 55.3960, GNorm = 2.0169, lr_0 = 5.4187e-04
Loss = 4.7492e-01, PNorm = 55.4120, GNorm = 1.6972, lr_0 = 5.4150e-04
Loss = 4.5986e-01, PNorm = 55.4229, GNorm = 1.2648, lr_0 = 5.4113e-04
Loss = 4.1294e-01, PNorm = 55.4364, GNorm = 1.3878, lr_0 = 5.4076e-04
Loss = 3.7289e-01, PNorm = 55.4450, GNorm = 0.9716, lr_0 = 5.4039e-04
Loss = 4.4359e-01, PNorm = 55.4556, GNorm = 1.3795, lr_0 = 5.4002e-04
Loss = 4.3628e-01, PNorm = 55.4619, GNorm = 1.8146, lr_0 = 5.3965e-04
Loss = 4.2521e-01, PNorm = 55.4712, GNorm = 1.3090, lr_0 = 5.3928e-04
Loss = 3.4847e-01, PNorm = 55.4820, GNorm = 0.8554, lr_0 = 5.3891e-04
Loss = 4.1776e-01, PNorm = 55.4922, GNorm = 1.0386, lr_0 = 5.3854e-04
Loss = 4.5235e-01, PNorm = 55.4995, GNorm = 1.5963, lr_0 = 5.3817e-04
Loss = 4.7347e-01, PNorm = 55.5108, GNorm = 1.5222, lr_0 = 5.3781e-04
Loss = 3.8782e-01, PNorm = 55.5197, GNorm = 1.3959, lr_0 = 5.3744e-04
Loss = 3.8697e-01, PNorm = 55.5380, GNorm = 1.0936, lr_0 = 5.3707e-04
Loss = 4.1148e-01, PNorm = 55.5441, GNorm = 1.1705, lr_0 = 5.3670e-04
Loss = 4.4972e-01, PNorm = 55.5582, GNorm = 1.2370, lr_0 = 5.3633e-04
Loss = 4.5386e-01, PNorm = 55.5663, GNorm = 1.4185, lr_0 = 5.3597e-04
Loss = 4.0525e-01, PNorm = 55.5759, GNorm = 1.2296, lr_0 = 5.3560e-04
Loss = 4.2002e-01, PNorm = 55.5881, GNorm = 1.4052, lr_0 = 5.3523e-04
Loss = 4.2025e-01, PNorm = 55.6026, GNorm = 1.9285, lr_0 = 5.3486e-04
Loss = 4.2731e-01, PNorm = 55.6116, GNorm = 1.4048, lr_0 = 5.3450e-04
Loss = 3.9251e-01, PNorm = 55.6167, GNorm = 1.1611, lr_0 = 5.3413e-04
Loss = 4.0891e-01, PNorm = 55.6242, GNorm = 1.2032, lr_0 = 5.3377e-04
Loss = 4.6314e-01, PNorm = 55.6381, GNorm = 1.1024, lr_0 = 5.3340e-04
Loss = 4.4569e-01, PNorm = 55.6503, GNorm = 1.2466, lr_0 = 5.3304e-04
Loss = 3.8700e-01, PNorm = 55.6587, GNorm = 1.2797, lr_0 = 5.3267e-04
Loss = 4.3081e-01, PNorm = 55.6721, GNorm = 1.1227, lr_0 = 5.3231e-04
Loss = 4.2832e-01, PNorm = 55.6780, GNorm = 1.2197, lr_0 = 5.3194e-04
Loss = 3.8002e-01, PNorm = 55.6878, GNorm = 1.0942, lr_0 = 5.3158e-04
Loss = 4.3337e-01, PNorm = 55.6993, GNorm = 1.1570, lr_0 = 5.3121e-04
Loss = 4.2073e-01, PNorm = 55.7122, GNorm = 1.3013, lr_0 = 5.3085e-04
Loss = 4.5750e-01, PNorm = 55.7169, GNorm = 1.0503, lr_0 = 5.3048e-04
Loss = 4.5665e-01, PNorm = 55.7246, GNorm = 0.9209, lr_0 = 5.3012e-04
Loss = 4.2048e-01, PNorm = 55.7316, GNorm = 1.0276, lr_0 = 5.2976e-04
Loss = 4.5302e-01, PNorm = 55.7405, GNorm = 1.3422, lr_0 = 5.2939e-04
Loss = 3.6982e-01, PNorm = 55.7559, GNorm = 1.2208, lr_0 = 5.2903e-04
Loss = 4.7664e-01, PNorm = 55.7656, GNorm = 1.8565, lr_0 = 5.2867e-04
Loss = 4.4838e-01, PNorm = 55.7844, GNorm = 1.4585, lr_0 = 5.2831e-04
Loss = 4.0233e-01, PNorm = 55.7976, GNorm = 1.2773, lr_0 = 5.2795e-04
Loss = 4.1337e-01, PNorm = 55.8103, GNorm = 1.4520, lr_0 = 5.2758e-04
Loss = 4.6347e-01, PNorm = 55.8229, GNorm = 1.6186, lr_0 = 5.2722e-04
Loss = 4.1087e-01, PNorm = 55.8301, GNorm = 1.2805, lr_0 = 5.2686e-04
Loss = 3.6467e-01, PNorm = 55.8363, GNorm = 1.1058, lr_0 = 5.2650e-04
Loss = 4.5534e-01, PNorm = 55.8480, GNorm = 1.8863, lr_0 = 5.2614e-04
Loss = 4.2921e-01, PNorm = 55.8549, GNorm = 1.3313, lr_0 = 5.2578e-04
Loss = 4.0977e-01, PNorm = 55.8586, GNorm = 1.4734, lr_0 = 5.2542e-04
Loss = 3.9208e-01, PNorm = 55.8725, GNorm = 1.3343, lr_0 = 5.2506e-04
Loss = 4.1591e-01, PNorm = 55.8852, GNorm = 1.1823, lr_0 = 5.2470e-04
Loss = 4.3013e-01, PNorm = 55.8973, GNorm = 1.3962, lr_0 = 5.2434e-04
Loss = 4.0865e-01, PNorm = 55.9045, GNorm = 1.4000, lr_0 = 5.2398e-04
Loss = 3.9757e-01, PNorm = 55.9176, GNorm = 1.1029, lr_0 = 5.2362e-04
Loss = 3.8084e-01, PNorm = 55.9280, GNorm = 1.4223, lr_0 = 5.2326e-04
Loss = 4.0340e-01, PNorm = 55.9382, GNorm = 1.0248, lr_0 = 5.2290e-04
Loss = 4.5244e-01, PNorm = 55.9421, GNorm = 1.5129, lr_0 = 5.2255e-04
Loss = 4.2887e-01, PNorm = 55.9467, GNorm = 1.0035, lr_0 = 5.2219e-04
Loss = 4.3954e-01, PNorm = 55.9555, GNorm = 1.4936, lr_0 = 5.2183e-04
Loss = 4.0950e-01, PNorm = 55.9624, GNorm = 1.0658, lr_0 = 5.2147e-04
Loss = 4.0558e-01, PNorm = 55.9716, GNorm = 1.2060, lr_0 = 5.2112e-04
Loss = 4.3837e-01, PNorm = 55.9808, GNorm = 1.6951, lr_0 = 5.2076e-04
Loss = 3.9969e-01, PNorm = 55.9871, GNorm = 1.2379, lr_0 = 5.2040e-04
Loss = 4.2368e-01, PNorm = 55.9916, GNorm = 1.4512, lr_0 = 5.2005e-04
Loss = 4.4042e-01, PNorm = 56.0011, GNorm = 1.5020, lr_0 = 5.1969e-04
Loss = 3.6156e-01, PNorm = 56.0102, GNorm = 1.0683, lr_0 = 5.1933e-04
Loss = 3.8925e-01, PNorm = 56.0192, GNorm = 1.6221, lr_0 = 5.1898e-04
Loss = 3.9602e-01, PNorm = 56.0273, GNorm = 1.4021, lr_0 = 5.1862e-04
Loss = 4.6854e-01, PNorm = 56.0364, GNorm = 2.1199, lr_0 = 5.1827e-04
Loss = 4.2650e-01, PNorm = 56.0492, GNorm = 1.5354, lr_0 = 5.1791e-04
Validation mae = 0.115453
Epoch 10
Loss = 3.9964e-01, PNorm = 56.0598, GNorm = 1.4840, lr_0 = 5.1756e-04
Loss = 3.5568e-01, PNorm = 56.0719, GNorm = 1.0144, lr_0 = 5.1720e-04
Loss = 3.8974e-01, PNorm = 56.0812, GNorm = 1.1170, lr_0 = 5.1685e-04
Loss = 3.9698e-01, PNorm = 56.0891, GNorm = 1.2693, lr_0 = 5.1649e-04
Loss = 3.5114e-01, PNorm = 56.0964, GNorm = 1.1654, lr_0 = 5.1614e-04
Loss = 4.1752e-01, PNorm = 56.1043, GNorm = 1.5008, lr_0 = 5.1579e-04
Loss = 4.0993e-01, PNorm = 56.1107, GNorm = 1.7124, lr_0 = 5.1543e-04
Loss = 4.0705e-01, PNorm = 56.1178, GNorm = 1.2696, lr_0 = 5.1508e-04
Loss = 3.9934e-01, PNorm = 56.1221, GNorm = 1.2390, lr_0 = 5.1473e-04
Loss = 4.0234e-01, PNorm = 56.1334, GNorm = 0.9995, lr_0 = 5.1437e-04
Loss = 3.9656e-01, PNorm = 56.1428, GNorm = 1.2075, lr_0 = 5.1402e-04
Loss = 4.3968e-01, PNorm = 56.1529, GNorm = 1.1611, lr_0 = 5.1367e-04
Loss = 3.8848e-01, PNorm = 56.1550, GNorm = 1.2742, lr_0 = 5.1332e-04
Loss = 3.9060e-01, PNorm = 56.1639, GNorm = 1.4892, lr_0 = 5.1297e-04
Loss = 3.4383e-01, PNorm = 56.1707, GNorm = 1.1415, lr_0 = 5.1262e-04
Loss = 4.3652e-01, PNorm = 56.1810, GNorm = 1.5426, lr_0 = 5.1226e-04
Loss = 4.2206e-01, PNorm = 56.1841, GNorm = 1.3374, lr_0 = 5.1191e-04
Loss = 3.9066e-01, PNorm = 56.1887, GNorm = 1.5438, lr_0 = 5.1156e-04
Loss = 4.0288e-01, PNorm = 56.1979, GNorm = 1.6360, lr_0 = 5.1121e-04
Loss = 3.8645e-01, PNorm = 56.2032, GNorm = 1.8994, lr_0 = 5.1086e-04
Loss = 4.0545e-01, PNorm = 56.2135, GNorm = 1.3896, lr_0 = 5.1051e-04
Loss = 3.3291e-01, PNorm = 56.2258, GNorm = 1.2876, lr_0 = 5.1016e-04
Loss = 4.0031e-01, PNorm = 56.2302, GNorm = 0.9393, lr_0 = 5.0981e-04
Loss = 4.3063e-01, PNorm = 56.2379, GNorm = 1.7143, lr_0 = 5.0946e-04
Loss = 4.2214e-01, PNorm = 56.2469, GNorm = 1.1095, lr_0 = 5.0911e-04
Loss = 4.5274e-01, PNorm = 56.2559, GNorm = 1.3079, lr_0 = 5.0877e-04
Loss = 4.1515e-01, PNorm = 56.2666, GNorm = 1.3358, lr_0 = 5.0842e-04
Loss = 3.9979e-01, PNorm = 56.2791, GNorm = 1.3031, lr_0 = 5.0807e-04
Loss = 4.2756e-01, PNorm = 56.2892, GNorm = 1.2267, lr_0 = 5.0772e-04
Loss = 4.6250e-01, PNorm = 56.3034, GNorm = 1.8462, lr_0 = 5.0737e-04
Loss = 3.9319e-01, PNorm = 56.3123, GNorm = 2.5708, lr_0 = 5.0703e-04
Loss = 4.4940e-01, PNorm = 56.3213, GNorm = 1.7952, lr_0 = 5.0668e-04
Loss = 3.9679e-01, PNorm = 56.3318, GNorm = 1.2016, lr_0 = 5.0633e-04
Loss = 3.6004e-01, PNorm = 56.3413, GNorm = 1.2341, lr_0 = 5.0598e-04
Loss = 4.2641e-01, PNorm = 56.3481, GNorm = 1.4220, lr_0 = 5.0564e-04
Loss = 4.3468e-01, PNorm = 56.3635, GNorm = 1.1694, lr_0 = 5.0529e-04
Loss = 4.1150e-01, PNorm = 56.3745, GNorm = 1.2665, lr_0 = 5.0494e-04
Loss = 3.7755e-01, PNorm = 56.3819, GNorm = 0.9571, lr_0 = 5.0460e-04
Loss = 4.7665e-01, PNorm = 56.3927, GNorm = 1.7981, lr_0 = 5.0425e-04
Loss = 4.1065e-01, PNorm = 56.4061, GNorm = 1.1935, lr_0 = 5.0391e-04
Loss = 4.4276e-01, PNorm = 56.4182, GNorm = 1.2630, lr_0 = 5.0356e-04
Loss = 3.8459e-01, PNorm = 56.4302, GNorm = 1.6349, lr_0 = 5.0322e-04
Loss = 3.8353e-01, PNorm = 56.4427, GNorm = 1.3771, lr_0 = 5.0287e-04
Loss = 4.0964e-01, PNorm = 56.4524, GNorm = 1.3747, lr_0 = 5.0253e-04
Loss = 3.9788e-01, PNorm = 56.4647, GNorm = 3.1499, lr_0 = 5.0218e-04
Loss = 4.3179e-01, PNorm = 56.4716, GNorm = 1.9631, lr_0 = 5.0184e-04
Loss = 3.3499e-01, PNorm = 56.4816, GNorm = 1.2284, lr_0 = 5.0150e-04
Loss = 3.8278e-01, PNorm = 56.4878, GNorm = 1.7086, lr_0 = 5.0115e-04
Loss = 4.3107e-01, PNorm = 56.5030, GNorm = 1.3867, lr_0 = 5.0081e-04
Loss = 4.6778e-01, PNorm = 56.5109, GNorm = 1.2027, lr_0 = 5.0047e-04
Loss = 4.1796e-01, PNorm = 56.5204, GNorm = 1.2349, lr_0 = 5.0012e-04
Loss = 4.0287e-01, PNorm = 56.5304, GNorm = 1.4276, lr_0 = 4.9978e-04
Loss = 4.0766e-01, PNorm = 56.5413, GNorm = 1.7549, lr_0 = 4.9944e-04
Loss = 4.2224e-01, PNorm = 56.5550, GNorm = 1.6916, lr_0 = 4.9910e-04
Loss = 4.3280e-01, PNorm = 56.5652, GNorm = 2.0253, lr_0 = 4.9875e-04
Loss = 4.0438e-01, PNorm = 56.5792, GNorm = 1.5101, lr_0 = 4.9841e-04
Loss = 4.4412e-01, PNorm = 56.5879, GNorm = 1.3373, lr_0 = 4.9807e-04
Loss = 4.1763e-01, PNorm = 56.5929, GNorm = 1.3697, lr_0 = 4.9773e-04
Loss = 4.0664e-01, PNorm = 56.6009, GNorm = 1.5115, lr_0 = 4.9739e-04
Loss = 4.5827e-01, PNorm = 56.6150, GNorm = 1.7246, lr_0 = 4.9705e-04
Loss = 4.0032e-01, PNorm = 56.6231, GNorm = 1.6002, lr_0 = 4.9671e-04
Loss = 4.2186e-01, PNorm = 56.6339, GNorm = 0.9979, lr_0 = 4.9637e-04
Loss = 3.9952e-01, PNorm = 56.6503, GNorm = 1.0336, lr_0 = 4.9603e-04
Loss = 3.9393e-01, PNorm = 56.6594, GNorm = 1.4893, lr_0 = 4.9569e-04
Loss = 4.4356e-01, PNorm = 56.6735, GNorm = 1.5196, lr_0 = 4.9535e-04
Loss = 4.4153e-01, PNorm = 56.6833, GNorm = 1.5364, lr_0 = 4.9501e-04
Loss = 4.1859e-01, PNorm = 56.6885, GNorm = 1.8743, lr_0 = 4.9467e-04
Loss = 4.2218e-01, PNorm = 56.6968, GNorm = 2.5373, lr_0 = 4.9433e-04
Loss = 4.0555e-01, PNorm = 56.7019, GNorm = 1.0285, lr_0 = 4.9399e-04
Loss = 3.6912e-01, PNorm = 56.7090, GNorm = 1.2968, lr_0 = 4.9365e-04
Loss = 4.1763e-01, PNorm = 56.7150, GNorm = 1.5877, lr_0 = 4.9332e-04
Loss = 3.6255e-01, PNorm = 56.7201, GNorm = 1.7553, lr_0 = 4.9298e-04
Loss = 4.0572e-01, PNorm = 56.7256, GNorm = 1.3417, lr_0 = 4.9264e-04
Loss = 4.0418e-01, PNorm = 56.7360, GNorm = 1.2256, lr_0 = 4.9230e-04
Loss = 4.2211e-01, PNorm = 56.7400, GNorm = 1.2793, lr_0 = 4.9197e-04
Loss = 4.6461e-01, PNorm = 56.7494, GNorm = 1.8128, lr_0 = 4.9163e-04
Loss = 4.2818e-01, PNorm = 56.7539, GNorm = 1.3886, lr_0 = 4.9129e-04
Loss = 4.1685e-01, PNorm = 56.7655, GNorm = 1.0673, lr_0 = 4.9095e-04
Loss = 4.7236e-01, PNorm = 56.7798, GNorm = 1.4261, lr_0 = 4.9062e-04
Loss = 4.3251e-01, PNorm = 56.7845, GNorm = 2.0148, lr_0 = 4.9028e-04
Loss = 3.9202e-01, PNorm = 56.7911, GNorm = 1.3802, lr_0 = 4.8995e-04
Loss = 3.9327e-01, PNorm = 56.7962, GNorm = 1.0904, lr_0 = 4.8961e-04
Loss = 4.2779e-01, PNorm = 56.8054, GNorm = 1.2699, lr_0 = 4.8928e-04
Loss = 4.4042e-01, PNorm = 56.8153, GNorm = 1.1862, lr_0 = 4.8894e-04
Loss = 4.2200e-01, PNorm = 56.8284, GNorm = 1.5653, lr_0 = 4.8861e-04
Loss = 4.3124e-01, PNorm = 56.8362, GNorm = 1.1066, lr_0 = 4.8827e-04
Loss = 4.0645e-01, PNorm = 56.8463, GNorm = 1.2706, lr_0 = 4.8794e-04
Loss = 4.0609e-01, PNorm = 56.8523, GNorm = 0.9164, lr_0 = 4.8760e-04
Loss = 4.1100e-01, PNorm = 56.8604, GNorm = 1.2643, lr_0 = 4.8727e-04
Loss = 3.8907e-01, PNorm = 56.8686, GNorm = 0.9126, lr_0 = 4.8693e-04
Loss = 4.1017e-01, PNorm = 56.8761, GNorm = 1.5053, lr_0 = 4.8660e-04
Loss = 4.0591e-01, PNorm = 56.8814, GNorm = 1.2331, lr_0 = 4.8627e-04
Loss = 4.2134e-01, PNorm = 56.8835, GNorm = 1.6073, lr_0 = 4.8593e-04
Loss = 4.4092e-01, PNorm = 56.8914, GNorm = 1.3141, lr_0 = 4.8560e-04
Loss = 4.3932e-01, PNorm = 56.9033, GNorm = 1.7030, lr_0 = 4.8527e-04
Loss = 4.1630e-01, PNorm = 56.9170, GNorm = 1.6017, lr_0 = 4.8494e-04
Loss = 4.6584e-01, PNorm = 56.9247, GNorm = 1.1829, lr_0 = 4.8460e-04
Loss = 4.4637e-01, PNorm = 56.9362, GNorm = 1.3889, lr_0 = 4.8427e-04
Loss = 4.2649e-01, PNorm = 56.9420, GNorm = 0.9847, lr_0 = 4.8394e-04
Loss = 4.2336e-01, PNorm = 56.9492, GNorm = 1.0411, lr_0 = 4.8361e-04
Loss = 4.1517e-01, PNorm = 56.9505, GNorm = 1.1766, lr_0 = 4.8328e-04
Loss = 4.4576e-01, PNorm = 56.9553, GNorm = 1.7414, lr_0 = 4.8295e-04
Loss = 3.9469e-01, PNorm = 56.9670, GNorm = 1.4450, lr_0 = 4.8262e-04
Loss = 3.9679e-01, PNorm = 56.9706, GNorm = 1.8421, lr_0 = 4.8228e-04
Loss = 5.0287e-01, PNorm = 56.9766, GNorm = 1.1558, lr_0 = 4.8195e-04
Loss = 4.1620e-01, PNorm = 56.9822, GNorm = 2.1835, lr_0 = 4.8162e-04
Loss = 4.2193e-01, PNorm = 56.9890, GNorm = 1.5705, lr_0 = 4.8129e-04
Loss = 4.3615e-01, PNorm = 56.9915, GNorm = 1.6582, lr_0 = 4.8096e-04
Loss = 3.9853e-01, PNorm = 56.9946, GNorm = 1.5953, lr_0 = 4.8064e-04
Loss = 3.8466e-01, PNorm = 56.9998, GNorm = 0.9994, lr_0 = 4.8031e-04
Loss = 4.8217e-01, PNorm = 57.0036, GNorm = 1.3966, lr_0 = 4.7998e-04
Loss = 4.4214e-01, PNorm = 57.0106, GNorm = 1.2441, lr_0 = 4.7965e-04
Loss = 3.5435e-01, PNorm = 57.0184, GNorm = 1.2337, lr_0 = 4.7932e-04
Loss = 4.1072e-01, PNorm = 57.0225, GNorm = 1.6037, lr_0 = 4.7899e-04
Loss = 4.6746e-01, PNorm = 57.0380, GNorm = 2.1964, lr_0 = 4.7866e-04
Loss = 4.6292e-01, PNorm = 57.0475, GNorm = 1.3911, lr_0 = 4.7833e-04
Loss = 3.8137e-01, PNorm = 57.0585, GNorm = 1.3869, lr_0 = 4.7801e-04
Loss = 3.4973e-01, PNorm = 57.0664, GNorm = 1.5675, lr_0 = 4.7768e-04
Loss = 4.2518e-01, PNorm = 57.0728, GNorm = 1.4944, lr_0 = 4.7735e-04
Loss = 4.0110e-01, PNorm = 57.0809, GNorm = 1.0372, lr_0 = 4.7703e-04
Validation mae = 0.114765
Epoch 11
Loss = 4.3749e-01, PNorm = 57.0859, GNorm = 1.4407, lr_0 = 4.7670e-04
Loss = 4.5832e-01, PNorm = 57.0955, GNorm = 1.4939, lr_0 = 4.7637e-04
Loss = 3.8161e-01, PNorm = 57.1038, GNorm = 1.1732, lr_0 = 4.7605e-04
Loss = 3.5441e-01, PNorm = 57.1133, GNorm = 1.3806, lr_0 = 4.7572e-04
Loss = 4.0066e-01, PNorm = 57.1207, GNorm = 1.9304, lr_0 = 4.7539e-04
Loss = 3.6251e-01, PNorm = 57.1309, GNorm = 1.1067, lr_0 = 4.7507e-04
Loss = 4.1290e-01, PNorm = 57.1356, GNorm = 1.3747, lr_0 = 4.7474e-04
Loss = 3.8144e-01, PNorm = 57.1468, GNorm = 1.4008, lr_0 = 4.7442e-04
Loss = 4.0172e-01, PNorm = 57.1552, GNorm = 1.2545, lr_0 = 4.7409e-04
Loss = 3.7227e-01, PNorm = 57.1660, GNorm = 2.1305, lr_0 = 4.7377e-04
Loss = 4.2394e-01, PNorm = 57.1736, GNorm = 0.8988, lr_0 = 4.7344e-04
Loss = 4.1225e-01, PNorm = 57.1851, GNorm = 1.2932, lr_0 = 4.7312e-04
Loss = 3.6868e-01, PNorm = 57.1921, GNorm = 1.8497, lr_0 = 4.7279e-04
Loss = 4.5698e-01, PNorm = 57.2026, GNorm = 1.5534, lr_0 = 4.7247e-04
Loss = 4.1153e-01, PNorm = 57.2108, GNorm = 1.7416, lr_0 = 4.7215e-04
Loss = 3.8077e-01, PNorm = 57.2208, GNorm = 1.1448, lr_0 = 4.7182e-04
Loss = 3.6631e-01, PNorm = 57.2288, GNorm = 1.3049, lr_0 = 4.7150e-04
Loss = 3.9336e-01, PNorm = 57.2339, GNorm = 1.8294, lr_0 = 4.7118e-04
Loss = 4.3332e-01, PNorm = 57.2388, GNorm = 1.7970, lr_0 = 4.7085e-04
Loss = 4.4569e-01, PNorm = 57.2503, GNorm = 1.7144, lr_0 = 4.7053e-04
Loss = 4.2631e-01, PNorm = 57.2597, GNorm = 1.9536, lr_0 = 4.7021e-04
Loss = 4.0089e-01, PNorm = 57.2672, GNorm = 1.5497, lr_0 = 4.6989e-04
Loss = 3.9986e-01, PNorm = 57.2734, GNorm = 1.1171, lr_0 = 4.6957e-04
Loss = 3.7161e-01, PNorm = 57.2848, GNorm = 1.5835, lr_0 = 4.6924e-04
Loss = 4.1721e-01, PNorm = 57.2958, GNorm = 1.4719, lr_0 = 4.6892e-04
Loss = 3.7180e-01, PNorm = 57.3044, GNorm = 1.0793, lr_0 = 4.6860e-04
Loss = 4.3743e-01, PNorm = 57.3061, GNorm = 1.0929, lr_0 = 4.6828e-04
Loss = 3.5759e-01, PNorm = 57.3119, GNorm = 1.0744, lr_0 = 4.6796e-04
Loss = 4.1079e-01, PNorm = 57.3176, GNorm = 1.9288, lr_0 = 4.6764e-04
Loss = 4.4759e-01, PNorm = 57.3274, GNorm = 1.2539, lr_0 = 4.6732e-04
Loss = 3.6695e-01, PNorm = 57.3349, GNorm = 1.4134, lr_0 = 4.6700e-04
Loss = 3.4532e-01, PNorm = 57.3394, GNorm = 1.1564, lr_0 = 4.6668e-04
Loss = 3.9633e-01, PNorm = 57.3473, GNorm = 1.9239, lr_0 = 4.6636e-04
Loss = 3.9705e-01, PNorm = 57.3600, GNorm = 1.0793, lr_0 = 4.6604e-04
Loss = 3.8805e-01, PNorm = 57.3709, GNorm = 1.0892, lr_0 = 4.6572e-04
Loss = 4.0789e-01, PNorm = 57.3784, GNorm = 1.8556, lr_0 = 4.6540e-04
Loss = 4.3415e-01, PNorm = 57.3847, GNorm = 1.2592, lr_0 = 4.6508e-04
Loss = 3.9936e-01, PNorm = 57.3930, GNorm = 1.1378, lr_0 = 4.6476e-04
Loss = 4.6099e-01, PNorm = 57.3991, GNorm = 1.1458, lr_0 = 4.6445e-04
Loss = 4.0257e-01, PNorm = 57.4063, GNorm = 1.6887, lr_0 = 4.6413e-04
Loss = 3.5206e-01, PNorm = 57.4113, GNorm = 0.7263, lr_0 = 4.6381e-04
Loss = 4.2324e-01, PNorm = 57.4221, GNorm = 1.9721, lr_0 = 4.6349e-04
Loss = 3.3609e-01, PNorm = 57.4307, GNorm = 1.2368, lr_0 = 4.6317e-04
Loss = 4.0349e-01, PNorm = 57.4374, GNorm = 1.2966, lr_0 = 4.6286e-04
Loss = 4.4010e-01, PNorm = 57.4446, GNorm = 1.1986, lr_0 = 4.6254e-04
Loss = 4.0170e-01, PNorm = 57.4545, GNorm = 1.4580, lr_0 = 4.6222e-04
Loss = 3.8397e-01, PNorm = 57.4618, GNorm = 1.2152, lr_0 = 4.6191e-04
Loss = 4.2533e-01, PNorm = 57.4714, GNorm = 1.2979, lr_0 = 4.6159e-04
Loss = 4.6188e-01, PNorm = 57.4802, GNorm = 1.4434, lr_0 = 4.6127e-04
Loss = 3.7679e-01, PNorm = 57.4919, GNorm = 1.2435, lr_0 = 4.6096e-04
Loss = 4.2651e-01, PNorm = 57.4977, GNorm = 1.1640, lr_0 = 4.6064e-04
Loss = 4.2969e-01, PNorm = 57.5098, GNorm = 1.9528, lr_0 = 4.6033e-04
Loss = 4.4257e-01, PNorm = 57.5172, GNorm = 1.6375, lr_0 = 4.6001e-04
Loss = 3.8946e-01, PNorm = 57.5221, GNorm = 1.7163, lr_0 = 4.5970e-04
Loss = 4.4080e-01, PNorm = 57.5288, GNorm = 1.1871, lr_0 = 4.5938e-04
Loss = 4.1432e-01, PNorm = 57.5382, GNorm = 1.1034, lr_0 = 4.5907e-04
Loss = 3.7509e-01, PNorm = 57.5483, GNorm = 1.2908, lr_0 = 4.5875e-04
Loss = 4.0375e-01, PNorm = 57.5543, GNorm = 1.4024, lr_0 = 4.5844e-04
Loss = 4.0179e-01, PNorm = 57.5622, GNorm = 1.3187, lr_0 = 4.5812e-04
Loss = 4.1496e-01, PNorm = 57.5700, GNorm = 1.5518, lr_0 = 4.5781e-04
Loss = 3.8655e-01, PNorm = 57.5777, GNorm = 1.0932, lr_0 = 4.5750e-04
Loss = 3.9424e-01, PNorm = 57.5850, GNorm = 1.2278, lr_0 = 4.5718e-04
Loss = 3.9733e-01, PNorm = 57.5888, GNorm = 1.0208, lr_0 = 4.5687e-04
Loss = 3.9804e-01, PNorm = 57.5927, GNorm = 1.5662, lr_0 = 4.5656e-04
Loss = 4.1685e-01, PNorm = 57.6055, GNorm = 1.5020, lr_0 = 4.5624e-04
Loss = 3.7667e-01, PNorm = 57.6165, GNorm = 1.4020, lr_0 = 4.5593e-04
Loss = 4.3877e-01, PNorm = 57.6253, GNorm = 2.0547, lr_0 = 4.5562e-04
Loss = 4.2443e-01, PNorm = 57.6263, GNorm = 1.3247, lr_0 = 4.5531e-04
Loss = 4.0903e-01, PNorm = 57.6324, GNorm = 1.3781, lr_0 = 4.5499e-04
Loss = 3.4138e-01, PNorm = 57.6416, GNorm = 1.5017, lr_0 = 4.5468e-04
Loss = 3.6423e-01, PNorm = 57.6508, GNorm = 1.3499, lr_0 = 4.5437e-04
Loss = 3.5716e-01, PNorm = 57.6563, GNorm = 1.4565, lr_0 = 4.5406e-04
Loss = 4.5684e-01, PNorm = 57.6671, GNorm = 1.9560, lr_0 = 4.5375e-04
Loss = 4.3652e-01, PNorm = 57.6789, GNorm = 1.1585, lr_0 = 4.5344e-04
Loss = 4.1223e-01, PNorm = 57.6843, GNorm = 0.9808, lr_0 = 4.5313e-04
Loss = 4.0719e-01, PNorm = 57.6951, GNorm = 1.2138, lr_0 = 4.5282e-04
Loss = 4.5310e-01, PNorm = 57.7017, GNorm = 1.6302, lr_0 = 4.5251e-04
Loss = 4.0728e-01, PNorm = 57.7093, GNorm = 1.5032, lr_0 = 4.5220e-04
Loss = 4.4717e-01, PNorm = 57.7183, GNorm = 1.7720, lr_0 = 4.5189e-04
Loss = 3.7120e-01, PNorm = 57.7231, GNorm = 1.3489, lr_0 = 4.5158e-04
Loss = 3.9324e-01, PNorm = 57.7289, GNorm = 2.2302, lr_0 = 4.5127e-04
Loss = 3.8903e-01, PNorm = 57.7320, GNorm = 1.5865, lr_0 = 4.5096e-04
Loss = 4.7293e-01, PNorm = 57.7429, GNorm = 1.5095, lr_0 = 4.5065e-04
Loss = 4.0542e-01, PNorm = 57.7516, GNorm = 1.8612, lr_0 = 4.5034e-04
Loss = 4.7456e-01, PNorm = 57.7589, GNorm = 2.4381, lr_0 = 4.5003e-04
Loss = 3.9743e-01, PNorm = 57.7667, GNorm = 1.3284, lr_0 = 4.4972e-04
Loss = 3.7789e-01, PNorm = 57.7754, GNorm = 1.5431, lr_0 = 4.4942e-04
Loss = 4.1467e-01, PNorm = 57.7778, GNorm = 1.2930, lr_0 = 4.4911e-04
Loss = 4.0389e-01, PNorm = 57.7887, GNorm = 1.7423, lr_0 = 4.4880e-04
Loss = 3.4831e-01, PNorm = 57.7974, GNorm = 1.1871, lr_0 = 4.4849e-04
Loss = 3.3840e-01, PNorm = 57.8060, GNorm = 1.3689, lr_0 = 4.4819e-04
Loss = 4.1689e-01, PNorm = 57.8126, GNorm = 1.4496, lr_0 = 4.4788e-04
Loss = 4.1308e-01, PNorm = 57.8245, GNorm = 1.4538, lr_0 = 4.4757e-04
Loss = 4.2802e-01, PNorm = 57.8288, GNorm = 1.2626, lr_0 = 4.4727e-04
Loss = 4.1962e-01, PNorm = 57.8334, GNorm = 1.5176, lr_0 = 4.4696e-04
Loss = 5.1146e-01, PNorm = 57.8321, GNorm = 1.5702, lr_0 = 4.4665e-04
Loss = 3.9761e-01, PNorm = 57.8390, GNorm = 1.0684, lr_0 = 4.4635e-04
Loss = 4.2351e-01, PNorm = 57.8439, GNorm = 1.8142, lr_0 = 4.4604e-04
Loss = 4.1744e-01, PNorm = 57.8536, GNorm = 1.0878, lr_0 = 4.4574e-04
Loss = 3.9417e-01, PNorm = 57.8632, GNorm = 1.0376, lr_0 = 4.4543e-04
Loss = 3.9916e-01, PNorm = 57.8659, GNorm = 1.2789, lr_0 = 4.4513e-04
Loss = 3.7159e-01, PNorm = 57.8738, GNorm = 1.2378, lr_0 = 4.4482e-04
Loss = 4.2593e-01, PNorm = 57.8817, GNorm = 2.6167, lr_0 = 4.4452e-04
Loss = 4.0678e-01, PNorm = 57.8879, GNorm = 1.2980, lr_0 = 4.4421e-04
Loss = 4.1414e-01, PNorm = 57.8879, GNorm = 1.2594, lr_0 = 4.4391e-04
Loss = 3.9126e-01, PNorm = 57.8961, GNorm = 1.4787, lr_0 = 4.4360e-04
Loss = 4.4023e-01, PNorm = 57.9060, GNorm = 1.6865, lr_0 = 4.4330e-04
Loss = 4.8173e-01, PNorm = 57.9148, GNorm = 2.2314, lr_0 = 4.4299e-04
Loss = 3.4620e-01, PNorm = 57.9183, GNorm = 1.1203, lr_0 = 4.4269e-04
Loss = 3.8329e-01, PNorm = 57.9213, GNorm = 1.2445, lr_0 = 4.4239e-04
Loss = 4.3452e-01, PNorm = 57.9270, GNorm = 1.7315, lr_0 = 4.4209e-04
Loss = 4.1502e-01, PNorm = 57.9349, GNorm = 1.5222, lr_0 = 4.4178e-04
Loss = 4.3057e-01, PNorm = 57.9431, GNorm = 1.2296, lr_0 = 4.4148e-04
Loss = 3.6885e-01, PNorm = 57.9494, GNorm = 1.0005, lr_0 = 4.4118e-04
Loss = 4.2908e-01, PNorm = 57.9571, GNorm = 1.3069, lr_0 = 4.4088e-04
Loss = 4.2443e-01, PNorm = 57.9660, GNorm = 1.4341, lr_0 = 4.4057e-04
Loss = 4.5556e-01, PNorm = 57.9680, GNorm = 1.5101, lr_0 = 4.4027e-04
Loss = 5.0596e-01, PNorm = 57.9789, GNorm = 1.3745, lr_0 = 4.3997e-04
Loss = 4.5698e-01, PNorm = 57.9804, GNorm = 1.3730, lr_0 = 4.3967e-04
Loss = 3.9637e-01, PNorm = 57.9876, GNorm = 1.3286, lr_0 = 4.3937e-04
Validation mae = 0.115079
Epoch 12
Loss = 3.7137e-01, PNorm = 57.9978, GNorm = 1.6954, lr_0 = 4.3907e-04
Loss = 4.5836e-01, PNorm = 58.0077, GNorm = 1.2924, lr_0 = 4.3877e-04
Loss = 4.2604e-01, PNorm = 58.0102, GNorm = 1.1185, lr_0 = 4.3846e-04
Loss = 4.4015e-01, PNorm = 58.0136, GNorm = 1.5618, lr_0 = 4.3816e-04
Loss = 3.8542e-01, PNorm = 58.0187, GNorm = 1.1076, lr_0 = 4.3786e-04
Loss = 3.4524e-01, PNorm = 58.0258, GNorm = 1.4105, lr_0 = 4.3756e-04
Loss = 3.9265e-01, PNorm = 58.0364, GNorm = 1.2964, lr_0 = 4.3726e-04
Loss = 4.8429e-01, PNorm = 58.0427, GNorm = 0.9850, lr_0 = 4.3696e-04
Loss = 3.8580e-01, PNorm = 58.0495, GNorm = 1.4156, lr_0 = 4.3667e-04
Loss = 3.4759e-01, PNorm = 58.0540, GNorm = 1.6577, lr_0 = 4.3637e-04
Loss = 3.7295e-01, PNorm = 58.0596, GNorm = 1.8241, lr_0 = 4.3607e-04
Loss = 3.9595e-01, PNorm = 58.0710, GNorm = 1.0905, lr_0 = 4.3577e-04
Loss = 3.5040e-01, PNorm = 58.0750, GNorm = 1.0160, lr_0 = 4.3547e-04
Loss = 4.1623e-01, PNorm = 58.0810, GNorm = 2.1073, lr_0 = 4.3517e-04
Loss = 3.6234e-01, PNorm = 58.0916, GNorm = 1.0750, lr_0 = 4.3487e-04
Loss = 3.6624e-01, PNorm = 58.0990, GNorm = 1.2839, lr_0 = 4.3458e-04
Loss = 3.9795e-01, PNorm = 58.1050, GNorm = 2.2235, lr_0 = 4.3428e-04
Loss = 3.4169e-01, PNorm = 58.1118, GNorm = 1.3072, lr_0 = 4.3398e-04
Loss = 3.6468e-01, PNorm = 58.1172, GNorm = 1.1543, lr_0 = 4.3368e-04
Loss = 3.5727e-01, PNorm = 58.1237, GNorm = 1.5998, lr_0 = 4.3339e-04
Loss = 3.5028e-01, PNorm = 58.1293, GNorm = 1.3297, lr_0 = 4.3309e-04
Loss = 3.8257e-01, PNorm = 58.1402, GNorm = 1.5182, lr_0 = 4.3279e-04
Loss = 3.9202e-01, PNorm = 58.1492, GNorm = 1.0805, lr_0 = 4.3250e-04
Loss = 3.6894e-01, PNorm = 58.1569, GNorm = 0.6443, lr_0 = 4.3220e-04
Loss = 3.4059e-01, PNorm = 58.1625, GNorm = 1.3738, lr_0 = 4.3190e-04
Loss = 3.8032e-01, PNorm = 58.1686, GNorm = 1.3568, lr_0 = 4.3161e-04
Loss = 3.8055e-01, PNorm = 58.1755, GNorm = 1.1699, lr_0 = 4.3131e-04
Loss = 3.6131e-01, PNorm = 58.1830, GNorm = 1.2671, lr_0 = 4.3102e-04
Loss = 3.8448e-01, PNorm = 58.1922, GNorm = 0.9567, lr_0 = 4.3072e-04
Loss = 3.1862e-01, PNorm = 58.2009, GNorm = 1.0221, lr_0 = 4.3043e-04
Loss = 4.1399e-01, PNorm = 58.2058, GNorm = 1.2472, lr_0 = 4.3013e-04
Loss = 4.0509e-01, PNorm = 58.2145, GNorm = 1.4086, lr_0 = 4.2984e-04
Loss = 4.0836e-01, PNorm = 58.2212, GNorm = 1.5590, lr_0 = 4.2954e-04
Loss = 3.8789e-01, PNorm = 58.2277, GNorm = 1.7453, lr_0 = 4.2925e-04
Loss = 4.1101e-01, PNorm = 58.2368, GNorm = 1.4095, lr_0 = 4.2895e-04
Loss = 3.7246e-01, PNorm = 58.2433, GNorm = 1.5880, lr_0 = 4.2866e-04
Loss = 4.0099e-01, PNorm = 58.2509, GNorm = 1.3929, lr_0 = 4.2837e-04
Loss = 3.9391e-01, PNorm = 58.2630, GNorm = 2.6377, lr_0 = 4.2807e-04
Loss = 4.0192e-01, PNorm = 58.2659, GNorm = 1.3869, lr_0 = 4.2778e-04
Loss = 4.1269e-01, PNorm = 58.2768, GNorm = 1.5193, lr_0 = 4.2749e-04
Loss = 4.2233e-01, PNorm = 58.2863, GNorm = 1.5862, lr_0 = 4.2719e-04
Loss = 3.7832e-01, PNorm = 58.2891, GNorm = 1.8992, lr_0 = 4.2690e-04
Loss = 3.7063e-01, PNorm = 58.2928, GNorm = 1.0358, lr_0 = 4.2661e-04
Loss = 4.2464e-01, PNorm = 58.3000, GNorm = 1.8923, lr_0 = 4.2632e-04
Loss = 3.8459e-01, PNorm = 58.3049, GNorm = 1.5467, lr_0 = 4.2602e-04
Loss = 4.2887e-01, PNorm = 58.3137, GNorm = 1.4439, lr_0 = 4.2573e-04
Loss = 4.4010e-01, PNorm = 58.3284, GNorm = 1.5792, lr_0 = 4.2544e-04
Loss = 3.8781e-01, PNorm = 58.3358, GNorm = 1.2765, lr_0 = 4.2515e-04
Loss = 4.2119e-01, PNorm = 58.3414, GNorm = 0.9948, lr_0 = 4.2486e-04
Loss = 3.9041e-01, PNorm = 58.3485, GNorm = 1.3588, lr_0 = 4.2457e-04
Loss = 3.7685e-01, PNorm = 58.3555, GNorm = 1.5247, lr_0 = 4.2428e-04
Loss = 3.9516e-01, PNorm = 58.3599, GNorm = 1.9848, lr_0 = 4.2399e-04
Loss = 4.0713e-01, PNorm = 58.3628, GNorm = 1.4505, lr_0 = 4.2370e-04
Loss = 3.9700e-01, PNorm = 58.3719, GNorm = 1.3280, lr_0 = 4.2340e-04
Loss = 4.3938e-01, PNorm = 58.3808, GNorm = 1.7851, lr_0 = 4.2311e-04
Loss = 4.6612e-01, PNorm = 58.3884, GNorm = 1.4270, lr_0 = 4.2283e-04
Loss = 4.1912e-01, PNorm = 58.3897, GNorm = 2.0038, lr_0 = 4.2254e-04
Loss = 4.1257e-01, PNorm = 58.3984, GNorm = 1.7093, lr_0 = 4.2225e-04
Loss = 3.8406e-01, PNorm = 58.4089, GNorm = 1.5393, lr_0 = 4.2196e-04
Loss = 3.7608e-01, PNorm = 58.4211, GNorm = 1.4159, lr_0 = 4.2167e-04
Loss = 4.0222e-01, PNorm = 58.4268, GNorm = 1.2116, lr_0 = 4.2138e-04
Loss = 3.8223e-01, PNorm = 58.4338, GNorm = 1.5846, lr_0 = 4.2109e-04
Loss = 4.3222e-01, PNorm = 58.4453, GNorm = 1.3498, lr_0 = 4.2080e-04
Loss = 3.7518e-01, PNorm = 58.4540, GNorm = 1.2867, lr_0 = 4.2051e-04
Loss = 3.8094e-01, PNorm = 58.4630, GNorm = 1.1472, lr_0 = 4.2023e-04
Loss = 3.8951e-01, PNorm = 58.4730, GNorm = 1.2054, lr_0 = 4.1994e-04
Loss = 3.4305e-01, PNorm = 58.4763, GNorm = 1.0556, lr_0 = 4.1965e-04
Loss = 4.1280e-01, PNorm = 58.4780, GNorm = 1.5176, lr_0 = 4.1936e-04
Loss = 3.8785e-01, PNorm = 58.4849, GNorm = 1.1312, lr_0 = 4.1907e-04
Loss = 3.8638e-01, PNorm = 58.4933, GNorm = 1.2552, lr_0 = 4.1879e-04
Loss = 4.3604e-01, PNorm = 58.4942, GNorm = 1.1871, lr_0 = 4.1850e-04
Loss = 3.8708e-01, PNorm = 58.5000, GNorm = 1.8613, lr_0 = 4.1821e-04
Loss = 3.7999e-01, PNorm = 58.5056, GNorm = 1.3232, lr_0 = 4.1793e-04
Loss = 3.4542e-01, PNorm = 58.5093, GNorm = 1.8303, lr_0 = 4.1764e-04
Loss = 3.7651e-01, PNorm = 58.5172, GNorm = 1.1582, lr_0 = 4.1736e-04
Loss = 3.7486e-01, PNorm = 58.5235, GNorm = 1.5837, lr_0 = 4.1707e-04
Loss = 4.7343e-01, PNorm = 58.5318, GNorm = 1.0864, lr_0 = 4.1678e-04
Loss = 3.7263e-01, PNorm = 58.5368, GNorm = 1.2153, lr_0 = 4.1650e-04
Loss = 4.8073e-01, PNorm = 58.5423, GNorm = 1.3223, lr_0 = 4.1621e-04
Loss = 4.1544e-01, PNorm = 58.5502, GNorm = 1.3143, lr_0 = 4.1593e-04
Loss = 4.2414e-01, PNorm = 58.5558, GNorm = 1.5491, lr_0 = 4.1564e-04
Loss = 4.2520e-01, PNorm = 58.5576, GNorm = 1.3041, lr_0 = 4.1536e-04
Loss = 4.1287e-01, PNorm = 58.5631, GNorm = 1.3569, lr_0 = 4.1507e-04
Loss = 3.8503e-01, PNorm = 58.5689, GNorm = 0.9758, lr_0 = 4.1479e-04
Loss = 3.5406e-01, PNorm = 58.5696, GNorm = 1.0362, lr_0 = 4.1450e-04
Loss = 4.0148e-01, PNorm = 58.5777, GNorm = 1.2063, lr_0 = 4.1422e-04
Loss = 3.9819e-01, PNorm = 58.5864, GNorm = 1.4467, lr_0 = 4.1394e-04
Loss = 3.9198e-01, PNorm = 58.5936, GNorm = 1.2485, lr_0 = 4.1365e-04
Loss = 4.1044e-01, PNorm = 58.5941, GNorm = 1.3135, lr_0 = 4.1337e-04
Loss = 4.1147e-01, PNorm = 58.6008, GNorm = 1.0786, lr_0 = 4.1309e-04
Loss = 4.1936e-01, PNorm = 58.6044, GNorm = 1.5544, lr_0 = 4.1280e-04
Loss = 4.2349e-01, PNorm = 58.6124, GNorm = 1.9617, lr_0 = 4.1252e-04
Loss = 4.0216e-01, PNorm = 58.6181, GNorm = 1.2135, lr_0 = 4.1224e-04
Loss = 3.5744e-01, PNorm = 58.6270, GNorm = 1.1468, lr_0 = 4.1196e-04
Loss = 4.8300e-01, PNorm = 58.6350, GNorm = 1.1291, lr_0 = 4.1167e-04
Loss = 4.0628e-01, PNorm = 58.6397, GNorm = 1.3157, lr_0 = 4.1139e-04
Loss = 4.4803e-01, PNorm = 58.6509, GNorm = 1.9475, lr_0 = 4.1111e-04
Loss = 4.1007e-01, PNorm = 58.6595, GNorm = 1.5886, lr_0 = 4.1083e-04
Loss = 4.7502e-01, PNorm = 58.6692, GNorm = 1.1052, lr_0 = 4.1055e-04
Loss = 4.3604e-01, PNorm = 58.6726, GNorm = 1.5093, lr_0 = 4.1027e-04
Loss = 4.5265e-01, PNorm = 58.6839, GNorm = 1.5829, lr_0 = 4.0998e-04
Loss = 3.7679e-01, PNorm = 58.6895, GNorm = 2.5373, lr_0 = 4.0970e-04
Loss = 4.2980e-01, PNorm = 58.6934, GNorm = 0.9373, lr_0 = 4.0942e-04
Loss = 3.6019e-01, PNorm = 58.6970, GNorm = 1.0148, lr_0 = 4.0914e-04
Loss = 4.0485e-01, PNorm = 58.7040, GNorm = 1.6377, lr_0 = 4.0886e-04
Loss = 4.8803e-01, PNorm = 58.7117, GNorm = 1.1955, lr_0 = 4.0858e-04
Loss = 4.0045e-01, PNorm = 58.7176, GNorm = 1.0994, lr_0 = 4.0830e-04
Loss = 3.8589e-01, PNorm = 58.7225, GNorm = 1.3784, lr_0 = 4.0802e-04
Loss = 4.0502e-01, PNorm = 58.7266, GNorm = 1.4135, lr_0 = 4.0774e-04
Loss = 3.9489e-01, PNorm = 58.7323, GNorm = 1.4359, lr_0 = 4.0746e-04
Loss = 4.3129e-01, PNorm = 58.7398, GNorm = 1.0762, lr_0 = 4.0718e-04
Loss = 4.7501e-01, PNorm = 58.7446, GNorm = 1.0269, lr_0 = 4.0691e-04
Loss = 5.1944e-01, PNorm = 58.7515, GNorm = 1.3291, lr_0 = 4.0663e-04
Loss = 3.9821e-01, PNorm = 58.7595, GNorm = 0.8214, lr_0 = 4.0635e-04
Loss = 3.7084e-01, PNorm = 58.7653, GNorm = 1.7763, lr_0 = 4.0607e-04
Loss = 4.5187e-01, PNorm = 58.7742, GNorm = 1.4110, lr_0 = 4.0579e-04
Loss = 4.2347e-01, PNorm = 58.7768, GNorm = 1.0729, lr_0 = 4.0551e-04
Loss = 3.7047e-01, PNorm = 58.7845, GNorm = 1.0774, lr_0 = 4.0524e-04
Loss = 4.1602e-01, PNorm = 58.7873, GNorm = 1.6538, lr_0 = 4.0496e-04
Loss = 3.8363e-01, PNorm = 58.7956, GNorm = 0.9707, lr_0 = 4.0468e-04
Validation mae = 0.114112
Epoch 13
Loss = 4.3379e-01, PNorm = 58.7964, GNorm = 1.1634, lr_0 = 4.0440e-04
Loss = 4.0312e-01, PNorm = 58.8007, GNorm = 1.6876, lr_0 = 4.0413e-04
Loss = 3.8401e-01, PNorm = 58.8120, GNorm = 1.2840, lr_0 = 4.0385e-04
Loss = 4.6273e-01, PNorm = 58.8203, GNorm = 1.4725, lr_0 = 4.0357e-04
Loss = 3.8224e-01, PNorm = 58.8273, GNorm = 1.1126, lr_0 = 4.0330e-04
Loss = 3.5487e-01, PNorm = 58.8342, GNorm = 1.8250, lr_0 = 4.0302e-04
Loss = 3.3902e-01, PNorm = 58.8442, GNorm = 1.3068, lr_0 = 4.0274e-04
Loss = 4.1697e-01, PNorm = 58.8507, GNorm = 1.7689, lr_0 = 4.0247e-04
Loss = 3.9561e-01, PNorm = 58.8587, GNorm = 1.3737, lr_0 = 4.0219e-04
Loss = 3.7568e-01, PNorm = 58.8705, GNorm = 1.1531, lr_0 = 4.0192e-04
Loss = 3.8853e-01, PNorm = 58.8746, GNorm = 1.3661, lr_0 = 4.0164e-04
Loss = 3.7981e-01, PNorm = 58.8795, GNorm = 0.9838, lr_0 = 4.0137e-04
Loss = 3.8688e-01, PNorm = 58.8814, GNorm = 1.3448, lr_0 = 4.0109e-04
Loss = 4.1275e-01, PNorm = 58.8837, GNorm = 2.1615, lr_0 = 4.0082e-04
Loss = 3.6468e-01, PNorm = 58.8853, GNorm = 1.8663, lr_0 = 4.0054e-04
Loss = 3.7149e-01, PNorm = 58.8922, GNorm = 1.5271, lr_0 = 4.0027e-04
Loss = 3.7118e-01, PNorm = 58.8959, GNorm = 0.9873, lr_0 = 3.9999e-04
Loss = 3.7575e-01, PNorm = 58.9027, GNorm = 1.3926, lr_0 = 3.9972e-04
Loss = 3.6092e-01, PNorm = 58.9112, GNorm = 1.4282, lr_0 = 3.9945e-04
Loss = 3.8697e-01, PNorm = 58.9166, GNorm = 1.1575, lr_0 = 3.9917e-04
Loss = 4.9363e-01, PNorm = 58.9230, GNorm = 1.2258, lr_0 = 3.9890e-04
Loss = 3.6572e-01, PNorm = 58.9284, GNorm = 1.4355, lr_0 = 3.9863e-04
Loss = 3.7864e-01, PNorm = 58.9363, GNorm = 1.1304, lr_0 = 3.9835e-04
Loss = 3.4622e-01, PNorm = 58.9456, GNorm = 1.2528, lr_0 = 3.9808e-04
Loss = 3.1594e-01, PNorm = 58.9522, GNorm = 1.6078, lr_0 = 3.9781e-04
Loss = 4.2543e-01, PNorm = 58.9546, GNorm = 1.3969, lr_0 = 3.9753e-04
Loss = 4.0183e-01, PNorm = 58.9600, GNorm = 1.1252, lr_0 = 3.9726e-04
Loss = 4.2205e-01, PNorm = 58.9699, GNorm = 1.3324, lr_0 = 3.9699e-04
Loss = 3.7927e-01, PNorm = 58.9749, GNorm = 1.7436, lr_0 = 3.9672e-04
Loss = 3.7293e-01, PNorm = 58.9839, GNorm = 1.2726, lr_0 = 3.9645e-04
Loss = 4.6759e-01, PNorm = 58.9915, GNorm = 1.9653, lr_0 = 3.9617e-04
Loss = 3.7221e-01, PNorm = 58.9974, GNorm = 2.1168, lr_0 = 3.9590e-04
Loss = 3.8992e-01, PNorm = 58.9970, GNorm = 1.9786, lr_0 = 3.9563e-04
Loss = 4.0638e-01, PNorm = 59.0052, GNorm = 1.2410, lr_0 = 3.9536e-04
Loss = 4.1328e-01, PNorm = 59.0104, GNorm = 1.8557, lr_0 = 3.9509e-04
Loss = 4.1583e-01, PNorm = 59.0187, GNorm = 0.9302, lr_0 = 3.9482e-04
Loss = 3.5883e-01, PNorm = 59.0242, GNorm = 1.5988, lr_0 = 3.9455e-04
Loss = 3.6549e-01, PNorm = 59.0315, GNorm = 1.2874, lr_0 = 3.9428e-04
Loss = 3.6576e-01, PNorm = 59.0414, GNorm = 1.4067, lr_0 = 3.9401e-04
Loss = 3.9434e-01, PNorm = 59.0474, GNorm = 0.9374, lr_0 = 3.9374e-04
Loss = 4.0487e-01, PNorm = 59.0497, GNorm = 2.1500, lr_0 = 3.9347e-04
Loss = 4.3806e-01, PNorm = 59.0540, GNorm = 2.2435, lr_0 = 3.9320e-04
Loss = 3.9785e-01, PNorm = 59.0587, GNorm = 2.1011, lr_0 = 3.9293e-04
Loss = 3.9858e-01, PNorm = 59.0635, GNorm = 1.5529, lr_0 = 3.9266e-04
Loss = 4.3450e-01, PNorm = 59.0750, GNorm = 2.0345, lr_0 = 3.9239e-04
Loss = 3.5725e-01, PNorm = 59.0771, GNorm = 1.9807, lr_0 = 3.9212e-04
Loss = 4.5307e-01, PNorm = 59.0823, GNorm = 1.3340, lr_0 = 3.9185e-04
Loss = 4.4233e-01, PNorm = 59.0900, GNorm = 1.3491, lr_0 = 3.9159e-04
Loss = 3.6260e-01, PNorm = 59.0984, GNorm = 1.4575, lr_0 = 3.9132e-04
Loss = 4.2085e-01, PNorm = 59.1060, GNorm = 2.6196, lr_0 = 3.9105e-04
Loss = 3.8557e-01, PNorm = 59.1114, GNorm = 1.5246, lr_0 = 3.9078e-04
Loss = 4.2390e-01, PNorm = 59.1122, GNorm = 1.5676, lr_0 = 3.9051e-04
Loss = 3.9744e-01, PNorm = 59.1162, GNorm = 1.3859, lr_0 = 3.9025e-04
Loss = 3.9405e-01, PNorm = 59.1200, GNorm = 0.9754, lr_0 = 3.8998e-04
Loss = 3.9343e-01, PNorm = 59.1271, GNorm = 1.3315, lr_0 = 3.8971e-04
Loss = 4.1696e-01, PNorm = 59.1313, GNorm = 1.3576, lr_0 = 3.8945e-04
Loss = 3.7675e-01, PNorm = 59.1373, GNorm = 0.9915, lr_0 = 3.8918e-04
Loss = 3.7794e-01, PNorm = 59.1458, GNorm = 1.2592, lr_0 = 3.8891e-04
Loss = 3.7556e-01, PNorm = 59.1526, GNorm = 1.5115, lr_0 = 3.8865e-04
Loss = 3.9392e-01, PNorm = 59.1581, GNorm = 1.4145, lr_0 = 3.8838e-04
Loss = 4.2628e-01, PNorm = 59.1577, GNorm = 1.2680, lr_0 = 3.8811e-04
Loss = 3.9524e-01, PNorm = 59.1649, GNorm = 1.7628, lr_0 = 3.8785e-04
Loss = 4.3563e-01, PNorm = 59.1745, GNorm = 1.8611, lr_0 = 3.8758e-04
Loss = 2.9997e-01, PNorm = 59.1838, GNorm = 1.3232, lr_0 = 3.8732e-04
Loss = 4.0685e-01, PNorm = 59.1886, GNorm = 0.9428, lr_0 = 3.8705e-04
Loss = 3.6396e-01, PNorm = 59.1914, GNorm = 1.3700, lr_0 = 3.8679e-04
Loss = 3.8915e-01, PNorm = 59.1961, GNorm = 1.1832, lr_0 = 3.8652e-04
Loss = 3.8663e-01, PNorm = 59.2040, GNorm = 1.9773, lr_0 = 3.8626e-04
Loss = 3.8115e-01, PNorm = 59.2096, GNorm = 1.0221, lr_0 = 3.8599e-04
Loss = 3.8659e-01, PNorm = 59.2124, GNorm = 1.5038, lr_0 = 3.8573e-04
Loss = 3.4439e-01, PNorm = 59.2151, GNorm = 1.5399, lr_0 = 3.8546e-04
Loss = 4.1125e-01, PNorm = 59.2226, GNorm = 1.3665, lr_0 = 3.8520e-04
Loss = 3.9043e-01, PNorm = 59.2216, GNorm = 1.5997, lr_0 = 3.8493e-04
Loss = 4.0563e-01, PNorm = 59.2296, GNorm = 1.3663, lr_0 = 3.8467e-04
Loss = 3.6965e-01, PNorm = 59.2322, GNorm = 1.0080, lr_0 = 3.8441e-04
Loss = 3.9321e-01, PNorm = 59.2392, GNorm = 1.2417, lr_0 = 3.8414e-04
Loss = 4.1649e-01, PNorm = 59.2423, GNorm = 1.3463, lr_0 = 3.8388e-04
Loss = 4.3135e-01, PNorm = 59.2484, GNorm = 1.7074, lr_0 = 3.8362e-04
Loss = 3.7412e-01, PNorm = 59.2535, GNorm = 2.0047, lr_0 = 3.8336e-04
Loss = 4.2975e-01, PNorm = 59.2598, GNorm = 1.3110, lr_0 = 3.8309e-04
Loss = 3.6733e-01, PNorm = 59.2689, GNorm = 1.3014, lr_0 = 3.8283e-04
Loss = 3.7067e-01, PNorm = 59.2731, GNorm = 1.7763, lr_0 = 3.8257e-04
Loss = 3.5411e-01, PNorm = 59.2829, GNorm = 0.9799, lr_0 = 3.8231e-04
Loss = 3.9570e-01, PNorm = 59.2802, GNorm = 1.4318, lr_0 = 3.8204e-04
Loss = 3.7156e-01, PNorm = 59.2890, GNorm = 1.0399, lr_0 = 3.8178e-04
Loss = 3.6928e-01, PNorm = 59.2965, GNorm = 1.8983, lr_0 = 3.8152e-04
Loss = 4.1276e-01, PNorm = 59.2981, GNorm = 1.5073, lr_0 = 3.8126e-04
Loss = 3.6441e-01, PNorm = 59.3087, GNorm = 1.8039, lr_0 = 3.8100e-04
Loss = 4.1098e-01, PNorm = 59.3078, GNorm = 1.4800, lr_0 = 3.8074e-04
Loss = 4.0108e-01, PNorm = 59.3196, GNorm = 1.8978, lr_0 = 3.8048e-04
Loss = 4.1181e-01, PNorm = 59.3280, GNorm = 1.7216, lr_0 = 3.8022e-04
Loss = 4.2026e-01, PNorm = 59.3369, GNorm = 1.8395, lr_0 = 3.7995e-04
Loss = 4.0645e-01, PNorm = 59.3420, GNorm = 1.5845, lr_0 = 3.7969e-04
Loss = 3.6861e-01, PNorm = 59.3467, GNorm = 1.1868, lr_0 = 3.7943e-04
Loss = 3.9078e-01, PNorm = 59.3457, GNorm = 1.3658, lr_0 = 3.7917e-04
Loss = 4.8389e-01, PNorm = 59.3528, GNorm = 1.7497, lr_0 = 3.7891e-04
Loss = 3.6590e-01, PNorm = 59.3600, GNorm = 1.3087, lr_0 = 3.7866e-04
Loss = 4.1431e-01, PNorm = 59.3648, GNorm = 1.7704, lr_0 = 3.7840e-04
Loss = 3.8335e-01, PNorm = 59.3706, GNorm = 1.2961, lr_0 = 3.7814e-04
Loss = 3.8727e-01, PNorm = 59.3801, GNorm = 1.4812, lr_0 = 3.7788e-04
Loss = 4.4529e-01, PNorm = 59.3849, GNorm = 1.7898, lr_0 = 3.7762e-04
Loss = 3.8734e-01, PNorm = 59.3924, GNorm = 1.2778, lr_0 = 3.7736e-04
Loss = 4.3901e-01, PNorm = 59.3957, GNorm = 1.5123, lr_0 = 3.7710e-04
Loss = 4.2164e-01, PNorm = 59.3993, GNorm = 1.6535, lr_0 = 3.7684e-04
Loss = 5.0337e-01, PNorm = 59.4038, GNorm = 1.7398, lr_0 = 3.7659e-04
Loss = 3.7211e-01, PNorm = 59.4136, GNorm = 1.1672, lr_0 = 3.7633e-04
Loss = 4.0520e-01, PNorm = 59.4243, GNorm = 1.4540, lr_0 = 3.7607e-04
Loss = 4.3914e-01, PNorm = 59.4296, GNorm = 1.3519, lr_0 = 3.7581e-04
Loss = 3.7821e-01, PNorm = 59.4344, GNorm = 1.7045, lr_0 = 3.7555e-04
Loss = 3.8364e-01, PNorm = 59.4357, GNorm = 1.8055, lr_0 = 3.7530e-04
Loss = 3.9689e-01, PNorm = 59.4403, GNorm = 1.9781, lr_0 = 3.7504e-04
Loss = 4.5776e-01, PNorm = 59.4441, GNorm = 1.3442, lr_0 = 3.7478e-04
Loss = 4.3271e-01, PNorm = 59.4456, GNorm = 1.0470, lr_0 = 3.7453e-04
Loss = 4.3377e-01, PNorm = 59.4422, GNorm = 1.6389, lr_0 = 3.7427e-04
Loss = 3.8906e-01, PNorm = 59.4446, GNorm = 1.6388, lr_0 = 3.7401e-04
Loss = 3.8226e-01, PNorm = 59.4484, GNorm = 1.1175, lr_0 = 3.7376e-04
Loss = 3.7836e-01, PNorm = 59.4533, GNorm = 1.1562, lr_0 = 3.7350e-04
Loss = 4.0133e-01, PNorm = 59.4564, GNorm = 1.4030, lr_0 = 3.7325e-04
Loss = 3.6494e-01, PNorm = 59.4640, GNorm = 1.0195, lr_0 = 3.7299e-04
Loss = 3.7978e-01, PNorm = 59.4713, GNorm = 2.0788, lr_0 = 3.7273e-04
Validation mae = 0.113557
Epoch 14
Loss = 3.9053e-01, PNorm = 59.4771, GNorm = 1.0079, lr_0 = 3.7248e-04
Loss = 3.5294e-01, PNorm = 59.4845, GNorm = 1.6475, lr_0 = 3.7222e-04
Loss = 3.8659e-01, PNorm = 59.4922, GNorm = 0.9693, lr_0 = 3.7197e-04
Loss = 3.6399e-01, PNorm = 59.5016, GNorm = 1.3644, lr_0 = 3.7171e-04
Loss = 3.9408e-01, PNorm = 59.5053, GNorm = 1.4702, lr_0 = 3.7146e-04
Loss = 4.3533e-01, PNorm = 59.5094, GNorm = 1.3077, lr_0 = 3.7120e-04
Loss = 4.1505e-01, PNorm = 59.5135, GNorm = 0.9695, lr_0 = 3.7095e-04
Loss = 4.3826e-01, PNorm = 59.5214, GNorm = 1.4651, lr_0 = 3.7070e-04
Loss = 3.8296e-01, PNorm = 59.5292, GNorm = 1.9957, lr_0 = 3.7044e-04
Loss = 3.7346e-01, PNorm = 59.5398, GNorm = 1.1502, lr_0 = 3.7019e-04
Loss = 4.6825e-01, PNorm = 59.5441, GNorm = 1.4197, lr_0 = 3.6993e-04
Loss = 3.2642e-01, PNorm = 59.5528, GNorm = 1.3053, lr_0 = 3.6968e-04
Loss = 4.3552e-01, PNorm = 59.5562, GNorm = 1.7100, lr_0 = 3.6943e-04
Loss = 4.1858e-01, PNorm = 59.5602, GNorm = 2.2300, lr_0 = 3.6917e-04
Loss = 4.1239e-01, PNorm = 59.5612, GNorm = 1.2474, lr_0 = 3.6892e-04
Loss = 4.2184e-01, PNorm = 59.5610, GNorm = 2.1594, lr_0 = 3.6867e-04
Loss = 3.8497e-01, PNorm = 59.5691, GNorm = 1.1220, lr_0 = 3.6842e-04
Loss = 4.0228e-01, PNorm = 59.5716, GNorm = 1.3204, lr_0 = 3.6816e-04
Loss = 3.7934e-01, PNorm = 59.5748, GNorm = 1.4260, lr_0 = 3.6791e-04
Loss = 4.1198e-01, PNorm = 59.5826, GNorm = 1.5124, lr_0 = 3.6766e-04
Loss = 3.7953e-01, PNorm = 59.5927, GNorm = 1.8016, lr_0 = 3.6741e-04
Loss = 4.3047e-01, PNorm = 59.5939, GNorm = 1.6440, lr_0 = 3.6716e-04
Loss = 4.1784e-01, PNorm = 59.6037, GNorm = 1.1315, lr_0 = 3.6690e-04
Loss = 3.6601e-01, PNorm = 59.6069, GNorm = 1.3257, lr_0 = 3.6665e-04
Loss = 3.8972e-01, PNorm = 59.6130, GNorm = 1.5817, lr_0 = 3.6640e-04
Loss = 4.2280e-01, PNorm = 59.6225, GNorm = 1.3010, lr_0 = 3.6615e-04
Loss = 3.8143e-01, PNorm = 59.6264, GNorm = 1.4970, lr_0 = 3.6590e-04
Loss = 3.8640e-01, PNorm = 59.6331, GNorm = 1.2043, lr_0 = 3.6565e-04
Loss = 3.8302e-01, PNorm = 59.6399, GNorm = 1.1681, lr_0 = 3.6540e-04
Loss = 3.7333e-01, PNorm = 59.6495, GNorm = 1.0584, lr_0 = 3.6515e-04
Loss = 3.6724e-01, PNorm = 59.6593, GNorm = 1.5893, lr_0 = 3.6490e-04
Loss = 4.1893e-01, PNorm = 59.6632, GNorm = 1.8934, lr_0 = 3.6465e-04
Loss = 3.4493e-01, PNorm = 59.6664, GNorm = 1.0242, lr_0 = 3.6440e-04
Loss = 4.3665e-01, PNorm = 59.6707, GNorm = 2.1747, lr_0 = 3.6415e-04
Loss = 3.4623e-01, PNorm = 59.6772, GNorm = 1.5532, lr_0 = 3.6390e-04
Loss = 3.8600e-01, PNorm = 59.6874, GNorm = 1.0688, lr_0 = 3.6365e-04
Loss = 3.7983e-01, PNorm = 59.6938, GNorm = 1.5274, lr_0 = 3.6340e-04
Loss = 4.0845e-01, PNorm = 59.6945, GNorm = 2.1959, lr_0 = 3.6315e-04
Loss = 4.1965e-01, PNorm = 59.6967, GNorm = 1.6692, lr_0 = 3.6290e-04
Loss = 3.6460e-01, PNorm = 59.6962, GNorm = 1.1576, lr_0 = 3.6266e-04
Loss = 3.8903e-01, PNorm = 59.7000, GNorm = 1.2121, lr_0 = 3.6241e-04
Loss = 3.7821e-01, PNorm = 59.7068, GNorm = 1.7275, lr_0 = 3.6216e-04
Loss = 4.2578e-01, PNorm = 59.7110, GNorm = 1.3992, lr_0 = 3.6191e-04
Loss = 4.2561e-01, PNorm = 59.7132, GNorm = 1.7131, lr_0 = 3.6166e-04
Loss = 3.4181e-01, PNorm = 59.7150, GNorm = 1.4578, lr_0 = 3.6141e-04
Loss = 3.4559e-01, PNorm = 59.7184, GNorm = 1.1423, lr_0 = 3.6117e-04
Loss = 3.7183e-01, PNorm = 59.7220, GNorm = 1.2002, lr_0 = 3.6092e-04
Loss = 3.8449e-01, PNorm = 59.7262, GNorm = 0.8217, lr_0 = 3.6067e-04
Loss = 3.7362e-01, PNorm = 59.7287, GNorm = 1.7464, lr_0 = 3.6043e-04
Loss = 3.6415e-01, PNorm = 59.7348, GNorm = 1.3008, lr_0 = 3.6018e-04
Loss = 3.8003e-01, PNorm = 59.7417, GNorm = 1.6327, lr_0 = 3.5993e-04
Loss = 3.6226e-01, PNorm = 59.7476, GNorm = 1.0159, lr_0 = 3.5969e-04
Loss = 3.9620e-01, PNorm = 59.7514, GNorm = 1.2026, lr_0 = 3.5944e-04
Loss = 3.9180e-01, PNorm = 59.7536, GNorm = 1.5415, lr_0 = 3.5919e-04
Loss = 4.2702e-01, PNorm = 59.7571, GNorm = 1.3982, lr_0 = 3.5895e-04
Loss = 3.9077e-01, PNorm = 59.7656, GNorm = 0.7253, lr_0 = 3.5870e-04
Loss = 4.1168e-01, PNorm = 59.7746, GNorm = 1.8129, lr_0 = 3.5845e-04
Loss = 3.1881e-01, PNorm = 59.7787, GNorm = 1.1388, lr_0 = 3.5821e-04
Loss = 3.7169e-01, PNorm = 59.7793, GNorm = 1.6834, lr_0 = 3.5796e-04
Loss = 3.6678e-01, PNorm = 59.7785, GNorm = 1.4266, lr_0 = 3.5772e-04
Loss = 4.4321e-01, PNorm = 59.7816, GNorm = 1.7956, lr_0 = 3.5747e-04
Loss = 3.8240e-01, PNorm = 59.7826, GNorm = 1.8107, lr_0 = 3.5723e-04
Loss = 3.8999e-01, PNorm = 59.7822, GNorm = 1.3633, lr_0 = 3.5698e-04
Loss = 3.9442e-01, PNorm = 59.7824, GNorm = 1.0779, lr_0 = 3.5674e-04
Loss = 3.9938e-01, PNorm = 59.7928, GNorm = 1.1697, lr_0 = 3.5650e-04
Loss = 3.9883e-01, PNorm = 59.8006, GNorm = 1.4091, lr_0 = 3.5625e-04
Loss = 4.0339e-01, PNorm = 59.8061, GNorm = 1.3535, lr_0 = 3.5601e-04
Loss = 3.8346e-01, PNorm = 59.8084, GNorm = 1.0848, lr_0 = 3.5576e-04
Loss = 3.9571e-01, PNorm = 59.8116, GNorm = 1.2611, lr_0 = 3.5552e-04
Loss = 3.8108e-01, PNorm = 59.8182, GNorm = 1.3991, lr_0 = 3.5528e-04
Loss = 4.0407e-01, PNorm = 59.8220, GNorm = 1.4759, lr_0 = 3.5503e-04
Loss = 3.5104e-01, PNorm = 59.8281, GNorm = 1.2904, lr_0 = 3.5479e-04
Loss = 4.0710e-01, PNorm = 59.8308, GNorm = 1.3715, lr_0 = 3.5455e-04
Loss = 3.6797e-01, PNorm = 59.8346, GNorm = 1.1261, lr_0 = 3.5430e-04
Loss = 3.9841e-01, PNorm = 59.8386, GNorm = 1.4737, lr_0 = 3.5406e-04
Loss = 3.7995e-01, PNorm = 59.8434, GNorm = 1.1684, lr_0 = 3.5382e-04
Loss = 3.5037e-01, PNorm = 59.8485, GNorm = 1.9143, lr_0 = 3.5358e-04
Loss = 3.8154e-01, PNorm = 59.8511, GNorm = 1.4825, lr_0 = 3.5333e-04
Loss = 4.1220e-01, PNorm = 59.8516, GNorm = 1.1720, lr_0 = 3.5309e-04
Loss = 4.2982e-01, PNorm = 59.8552, GNorm = 1.1114, lr_0 = 3.5285e-04
Loss = 3.7996e-01, PNorm = 59.8654, GNorm = 1.2576, lr_0 = 3.5261e-04
Loss = 3.5988e-01, PNorm = 59.8687, GNorm = 1.5444, lr_0 = 3.5237e-04
Loss = 3.6448e-01, PNorm = 59.8709, GNorm = 1.3765, lr_0 = 3.5212e-04
Loss = 4.1548e-01, PNorm = 59.8750, GNorm = 1.0056, lr_0 = 3.5188e-04
Loss = 3.7387e-01, PNorm = 59.8805, GNorm = 1.5120, lr_0 = 3.5164e-04
Loss = 3.5105e-01, PNorm = 59.8855, GNorm = 1.0679, lr_0 = 3.5140e-04
Loss = 3.6328e-01, PNorm = 59.8887, GNorm = 1.0568, lr_0 = 3.5116e-04
Loss = 3.4053e-01, PNorm = 59.8960, GNorm = 1.1117, lr_0 = 3.5092e-04
Loss = 4.2887e-01, PNorm = 59.9004, GNorm = 1.6099, lr_0 = 3.5068e-04
Loss = 4.1838e-01, PNorm = 59.9023, GNorm = 1.7351, lr_0 = 3.5044e-04
Loss = 3.7754e-01, PNorm = 59.9105, GNorm = 1.2869, lr_0 = 3.5020e-04
Loss = 4.4762e-01, PNorm = 59.9175, GNorm = 1.4178, lr_0 = 3.4996e-04
Loss = 4.0438e-01, PNorm = 59.9231, GNorm = 0.8838, lr_0 = 3.4972e-04
Loss = 3.6667e-01, PNorm = 59.9271, GNorm = 0.9934, lr_0 = 3.4948e-04
Loss = 3.7708e-01, PNorm = 59.9330, GNorm = 1.2520, lr_0 = 3.4924e-04
Loss = 4.6844e-01, PNorm = 59.9364, GNorm = 1.2751, lr_0 = 3.4900e-04
Loss = 3.4675e-01, PNorm = 59.9434, GNorm = 0.9430, lr_0 = 3.4876e-04
Loss = 3.6294e-01, PNorm = 59.9455, GNorm = 1.0430, lr_0 = 3.4852e-04
Loss = 3.9869e-01, PNorm = 59.9504, GNorm = 1.2824, lr_0 = 3.4828e-04
Loss = 3.5003e-01, PNorm = 59.9536, GNorm = 0.9817, lr_0 = 3.4805e-04
Loss = 3.3270e-01, PNorm = 59.9623, GNorm = 1.3783, lr_0 = 3.4781e-04
Loss = 3.9642e-01, PNorm = 59.9667, GNorm = 1.3389, lr_0 = 3.4757e-04
Loss = 3.6027e-01, PNorm = 59.9696, GNorm = 0.8852, lr_0 = 3.4733e-04
Loss = 3.5221e-01, PNorm = 59.9738, GNorm = 1.3491, lr_0 = 3.4709e-04
Loss = 4.3551e-01, PNorm = 59.9799, GNorm = 1.6709, lr_0 = 3.4686e-04
Loss = 3.9081e-01, PNorm = 59.9797, GNorm = 1.7351, lr_0 = 3.4662e-04
Loss = 4.7248e-01, PNorm = 59.9833, GNorm = 1.2540, lr_0 = 3.4638e-04
Loss = 3.7736e-01, PNorm = 59.9887, GNorm = 1.2827, lr_0 = 3.4614e-04
Loss = 4.1005e-01, PNorm = 59.9928, GNorm = 1.0756, lr_0 = 3.4591e-04
Loss = 3.2471e-01, PNorm = 59.9993, GNorm = 1.4901, lr_0 = 3.4567e-04
Loss = 3.8244e-01, PNorm = 60.0019, GNorm = 2.1532, lr_0 = 3.4543e-04
Loss = 3.5601e-01, PNorm = 60.0093, GNorm = 1.2614, lr_0 = 3.4520e-04
Loss = 4.1884e-01, PNorm = 60.0075, GNorm = 1.0387, lr_0 = 3.4496e-04
Loss = 3.8881e-01, PNorm = 60.0104, GNorm = 1.2245, lr_0 = 3.4472e-04
Loss = 3.4514e-01, PNorm = 60.0186, GNorm = 1.1842, lr_0 = 3.4449e-04
Loss = 3.8611e-01, PNorm = 60.0216, GNorm = 1.5130, lr_0 = 3.4425e-04
Loss = 3.3928e-01, PNorm = 60.0283, GNorm = 1.7607, lr_0 = 3.4402e-04
Loss = 3.6516e-01, PNorm = 60.0313, GNorm = 1.1096, lr_0 = 3.4378e-04
Loss = 4.6802e-01, PNorm = 60.0378, GNorm = 2.1870, lr_0 = 3.4354e-04
Loss = 3.4664e-01, PNorm = 60.0489, GNorm = 1.4927, lr_0 = 3.4331e-04
Validation mae = 0.112859
Epoch 15
Loss = 3.9157e-01, PNorm = 60.0551, GNorm = 1.3727, lr_0 = 3.4307e-04
Loss = 3.9338e-01, PNorm = 60.0603, GNorm = 1.9542, lr_0 = 3.4284e-04
Loss = 4.2897e-01, PNorm = 60.0673, GNorm = 1.3949, lr_0 = 3.4260e-04
Loss = 3.7148e-01, PNorm = 60.0687, GNorm = 1.3598, lr_0 = 3.4237e-04
Loss = 4.1713e-01, PNorm = 60.0722, GNorm = 1.1309, lr_0 = 3.4213e-04
Loss = 3.5483e-01, PNorm = 60.0768, GNorm = 0.9027, lr_0 = 3.4190e-04
Loss = 3.5677e-01, PNorm = 60.0826, GNorm = 1.2228, lr_0 = 3.4167e-04
Loss = 4.0691e-01, PNorm = 60.0845, GNorm = 1.1422, lr_0 = 3.4143e-04
Loss = 3.6019e-01, PNorm = 60.0872, GNorm = 1.1380, lr_0 = 3.4120e-04
Loss = 3.9439e-01, PNorm = 60.0934, GNorm = 1.1415, lr_0 = 3.4096e-04
Loss = 4.2666e-01, PNorm = 60.1007, GNorm = 1.5063, lr_0 = 3.4073e-04
Loss = 4.2158e-01, PNorm = 60.1043, GNorm = 1.3386, lr_0 = 3.4050e-04
Loss = 3.6269e-01, PNorm = 60.1068, GNorm = 1.1748, lr_0 = 3.4026e-04
Loss = 3.5021e-01, PNorm = 60.1086, GNorm = 0.8065, lr_0 = 3.4003e-04
Loss = 3.7133e-01, PNorm = 60.1140, GNorm = 1.1991, lr_0 = 3.3980e-04
Loss = 3.5714e-01, PNorm = 60.1212, GNorm = 1.8888, lr_0 = 3.3956e-04
Loss = 3.4473e-01, PNorm = 60.1292, GNorm = 1.7299, lr_0 = 3.3933e-04
Loss = 3.6131e-01, PNorm = 60.1333, GNorm = 1.1407, lr_0 = 3.3910e-04
Loss = 4.1098e-01, PNorm = 60.1364, GNorm = 1.2901, lr_0 = 3.3887e-04
Loss = 3.7864e-01, PNorm = 60.1401, GNorm = 1.8101, lr_0 = 3.3864e-04
Loss = 4.1742e-01, PNorm = 60.1418, GNorm = 1.7500, lr_0 = 3.3840e-04
Loss = 3.9421e-01, PNorm = 60.1458, GNorm = 0.8985, lr_0 = 3.3817e-04
Loss = 4.0292e-01, PNorm = 60.1518, GNorm = 1.9854, lr_0 = 3.3794e-04
Loss = 3.6826e-01, PNorm = 60.1522, GNorm = 1.8071, lr_0 = 3.3771e-04
Loss = 3.8359e-01, PNorm = 60.1531, GNorm = 1.7124, lr_0 = 3.3748e-04
Loss = 3.8082e-01, PNorm = 60.1588, GNorm = 1.4453, lr_0 = 3.3725e-04
Loss = 4.0021e-01, PNorm = 60.1650, GNorm = 1.2277, lr_0 = 3.3701e-04
Loss = 4.1043e-01, PNorm = 60.1727, GNorm = 1.2485, lr_0 = 3.3678e-04
Loss = 3.9289e-01, PNorm = 60.1760, GNorm = 1.6100, lr_0 = 3.3655e-04
Loss = 4.1424e-01, PNorm = 60.1800, GNorm = 1.3605, lr_0 = 3.3632e-04
Loss = 3.9459e-01, PNorm = 60.1866, GNorm = 1.6004, lr_0 = 3.3609e-04
Loss = 3.1899e-01, PNorm = 60.1891, GNorm = 1.2631, lr_0 = 3.3586e-04
Loss = 3.7030e-01, PNorm = 60.1898, GNorm = 1.2806, lr_0 = 3.3563e-04
Loss = 3.5807e-01, PNorm = 60.1950, GNorm = 0.8811, lr_0 = 3.3540e-04
Loss = 3.7895e-01, PNorm = 60.2009, GNorm = 1.4740, lr_0 = 3.3517e-04
Loss = 4.0330e-01, PNorm = 60.2048, GNorm = 1.6808, lr_0 = 3.3494e-04
Loss = 3.7388e-01, PNorm = 60.2082, GNorm = 1.1034, lr_0 = 3.3471e-04
Loss = 3.7656e-01, PNorm = 60.2074, GNorm = 1.2386, lr_0 = 3.3448e-04
Loss = 3.7788e-01, PNorm = 60.2125, GNorm = 1.5340, lr_0 = 3.3425e-04
Loss = 3.7168e-01, PNorm = 60.2196, GNorm = 2.0482, lr_0 = 3.3403e-04
Loss = 3.8677e-01, PNorm = 60.2235, GNorm = 1.1103, lr_0 = 3.3380e-04
Loss = 3.8688e-01, PNorm = 60.2281, GNorm = 1.1186, lr_0 = 3.3357e-04
Loss = 4.1943e-01, PNorm = 60.2374, GNorm = 2.0303, lr_0 = 3.3334e-04
Loss = 4.4233e-01, PNorm = 60.2422, GNorm = 1.8908, lr_0 = 3.3311e-04
Loss = 3.5959e-01, PNorm = 60.2461, GNorm = 1.5932, lr_0 = 3.3288e-04
Loss = 3.8680e-01, PNorm = 60.2521, GNorm = 1.1120, lr_0 = 3.3265e-04
Loss = 4.0365e-01, PNorm = 60.2554, GNorm = 2.0387, lr_0 = 3.3243e-04
Loss = 4.0150e-01, PNorm = 60.2581, GNorm = 2.4200, lr_0 = 3.3220e-04
Loss = 3.9870e-01, PNorm = 60.2604, GNorm = 1.3339, lr_0 = 3.3197e-04
Loss = 4.3509e-01, PNorm = 60.2669, GNorm = 1.5962, lr_0 = 3.3174e-04
Loss = 3.8581e-01, PNorm = 60.2727, GNorm = 1.1224, lr_0 = 3.3152e-04
Loss = 3.7498e-01, PNorm = 60.2786, GNorm = 1.5174, lr_0 = 3.3129e-04
Loss = 3.7084e-01, PNorm = 60.2844, GNorm = 1.2834, lr_0 = 3.3106e-04
Loss = 4.6231e-01, PNorm = 60.2888, GNorm = 1.2051, lr_0 = 3.3084e-04
Loss = 3.7969e-01, PNorm = 60.2949, GNorm = 1.2103, lr_0 = 3.3061e-04
Loss = 3.7770e-01, PNorm = 60.2990, GNorm = 1.6517, lr_0 = 3.3038e-04
Loss = 3.6485e-01, PNorm = 60.3028, GNorm = 1.1789, lr_0 = 3.3016e-04
Loss = 3.4188e-01, PNorm = 60.3082, GNorm = 1.1915, lr_0 = 3.2993e-04
Loss = 4.3831e-01, PNorm = 60.3140, GNorm = 1.3488, lr_0 = 3.2970e-04
Loss = 3.6661e-01, PNorm = 60.3217, GNorm = 1.5686, lr_0 = 3.2948e-04
Loss = 3.3258e-01, PNorm = 60.3258, GNorm = 1.2893, lr_0 = 3.2925e-04
Loss = 3.6607e-01, PNorm = 60.3291, GNorm = 1.3606, lr_0 = 3.2903e-04
Loss = 3.8152e-01, PNorm = 60.3298, GNorm = 1.3982, lr_0 = 3.2880e-04
Loss = 3.9911e-01, PNorm = 60.3337, GNorm = 1.3271, lr_0 = 3.2858e-04
Loss = 3.4632e-01, PNorm = 60.3418, GNorm = 2.4851, lr_0 = 3.2835e-04
Loss = 3.4547e-01, PNorm = 60.3413, GNorm = 2.0334, lr_0 = 3.2813e-04
Loss = 4.0293e-01, PNorm = 60.3477, GNorm = 2.1743, lr_0 = 3.2790e-04
Loss = 4.0249e-01, PNorm = 60.3486, GNorm = 1.3052, lr_0 = 3.2768e-04
Loss = 3.6446e-01, PNorm = 60.3528, GNorm = 1.2691, lr_0 = 3.2745e-04
Loss = 4.2535e-01, PNorm = 60.3584, GNorm = 1.1892, lr_0 = 3.2723e-04
Loss = 3.6667e-01, PNorm = 60.3645, GNorm = 1.1891, lr_0 = 3.2700e-04
Loss = 3.8610e-01, PNorm = 60.3693, GNorm = 2.0886, lr_0 = 3.2678e-04
Loss = 4.1356e-01, PNorm = 60.3793, GNorm = 3.8146, lr_0 = 3.2656e-04
Loss = 3.6278e-01, PNorm = 60.3839, GNorm = 1.2588, lr_0 = 3.2633e-04
Loss = 3.6730e-01, PNorm = 60.3917, GNorm = 1.3068, lr_0 = 3.2611e-04
Loss = 3.4663e-01, PNorm = 60.3940, GNorm = 1.3751, lr_0 = 3.2589e-04
Loss = 3.6240e-01, PNorm = 60.3992, GNorm = 0.9435, lr_0 = 3.2566e-04
Loss = 3.9524e-01, PNorm = 60.4036, GNorm = 1.9012, lr_0 = 3.2544e-04
Loss = 3.7588e-01, PNorm = 60.4078, GNorm = 0.9904, lr_0 = 3.2522e-04
Loss = 3.7797e-01, PNorm = 60.4131, GNorm = 1.3738, lr_0 = 3.2499e-04
Loss = 4.0113e-01, PNorm = 60.4152, GNorm = 1.3340, lr_0 = 3.2477e-04
Loss = 4.0194e-01, PNorm = 60.4187, GNorm = 1.8830, lr_0 = 3.2455e-04
Loss = 3.8272e-01, PNorm = 60.4204, GNorm = 2.0514, lr_0 = 3.2433e-04
Loss = 4.0598e-01, PNorm = 60.4286, GNorm = 1.3936, lr_0 = 3.2410e-04
Loss = 3.6539e-01, PNorm = 60.4321, GNorm = 1.4866, lr_0 = 3.2388e-04
Loss = 3.8776e-01, PNorm = 60.4384, GNorm = 1.2212, lr_0 = 3.2366e-04
Loss = 4.3422e-01, PNorm = 60.4459, GNorm = 1.2030, lr_0 = 3.2344e-04
Loss = 3.3281e-01, PNorm = 60.4530, GNorm = 1.2733, lr_0 = 3.2322e-04
Loss = 3.6370e-01, PNorm = 60.4560, GNorm = 1.3915, lr_0 = 3.2300e-04
Loss = 4.6467e-01, PNorm = 60.4570, GNorm = 1.2614, lr_0 = 3.2277e-04
Loss = 3.9905e-01, PNorm = 60.4622, GNorm = 1.6551, lr_0 = 3.2255e-04
Loss = 4.0025e-01, PNorm = 60.4629, GNorm = 1.3636, lr_0 = 3.2233e-04
Loss = 4.4201e-01, PNorm = 60.4651, GNorm = 2.0580, lr_0 = 3.2211e-04
Loss = 4.0060e-01, PNorm = 60.4743, GNorm = 1.4896, lr_0 = 3.2189e-04
Loss = 3.5065e-01, PNorm = 60.4826, GNorm = 1.7282, lr_0 = 3.2167e-04
Loss = 4.4192e-01, PNorm = 60.4843, GNorm = 1.5595, lr_0 = 3.2145e-04
Loss = 3.5094e-01, PNorm = 60.4897, GNorm = 1.5832, lr_0 = 3.2123e-04
Loss = 3.9003e-01, PNorm = 60.4885, GNorm = 1.1241, lr_0 = 3.2101e-04
Loss = 3.2730e-01, PNorm = 60.4946, GNorm = 1.0249, lr_0 = 3.2079e-04
Loss = 3.1884e-01, PNorm = 60.4995, GNorm = 1.2495, lr_0 = 3.2057e-04
Loss = 3.6170e-01, PNorm = 60.5038, GNorm = 1.8910, lr_0 = 3.2035e-04
Loss = 4.2190e-01, PNorm = 60.5094, GNorm = 1.2224, lr_0 = 3.2013e-04
Loss = 3.9858e-01, PNorm = 60.5122, GNorm = 1.2052, lr_0 = 3.1991e-04
Loss = 4.0247e-01, PNorm = 60.5198, GNorm = 1.5559, lr_0 = 3.1969e-04
Loss = 3.8310e-01, PNorm = 60.5251, GNorm = 1.0169, lr_0 = 3.1947e-04
Loss = 3.6276e-01, PNorm = 60.5276, GNorm = 1.8399, lr_0 = 3.1925e-04
Loss = 4.1251e-01, PNorm = 60.5326, GNorm = 1.0382, lr_0 = 3.1904e-04
Loss = 3.9320e-01, PNorm = 60.5394, GNorm = 1.1881, lr_0 = 3.1882e-04
Loss = 3.1470e-01, PNorm = 60.5453, GNorm = 1.2317, lr_0 = 3.1860e-04
Loss = 4.2149e-01, PNorm = 60.5460, GNorm = 1.1282, lr_0 = 3.1838e-04
Loss = 3.7084e-01, PNorm = 60.5495, GNorm = 1.5081, lr_0 = 3.1816e-04
Loss = 3.1833e-01, PNorm = 60.5563, GNorm = 1.8454, lr_0 = 3.1794e-04
Loss = 4.0889e-01, PNorm = 60.5606, GNorm = 1.7369, lr_0 = 3.1773e-04
Loss = 3.0526e-01, PNorm = 60.5635, GNorm = 1.6003, lr_0 = 3.1751e-04
Loss = 3.9848e-01, PNorm = 60.5685, GNorm = 1.1827, lr_0 = 3.1729e-04
Loss = 3.9979e-01, PNorm = 60.5769, GNorm = 1.3058, lr_0 = 3.1707e-04
Loss = 3.7356e-01, PNorm = 60.5804, GNorm = 1.5940, lr_0 = 3.1686e-04
Loss = 3.4311e-01, PNorm = 60.5849, GNorm = 1.7107, lr_0 = 3.1664e-04
Loss = 3.5843e-01, PNorm = 60.5881, GNorm = 1.5244, lr_0 = 3.1642e-04
Loss = 3.6862e-01, PNorm = 60.5910, GNorm = 1.2565, lr_0 = 3.1621e-04
Validation mae = 0.112501
Epoch 16
Loss = 3.5623e-01, PNorm = 60.5942, GNorm = 1.6952, lr_0 = 3.1599e-04
Loss = 3.8866e-01, PNorm = 60.6013, GNorm = 2.0974, lr_0 = 3.1577e-04
Loss = 3.2541e-01, PNorm = 60.6030, GNorm = 1.3690, lr_0 = 3.1556e-04
Loss = 3.7585e-01, PNorm = 60.6040, GNorm = 1.4807, lr_0 = 3.1534e-04
Loss = 3.6723e-01, PNorm = 60.6060, GNorm = 1.4534, lr_0 = 3.1512e-04
Loss = 3.3126e-01, PNorm = 60.6097, GNorm = 1.0651, lr_0 = 3.1491e-04
Loss = 3.2601e-01, PNorm = 60.6090, GNorm = 1.2363, lr_0 = 3.1469e-04
Loss = 3.6355e-01, PNorm = 60.6137, GNorm = 1.4826, lr_0 = 3.1448e-04
Loss = 4.2348e-01, PNorm = 60.6190, GNorm = 1.0869, lr_0 = 3.1426e-04
Loss = 3.7555e-01, PNorm = 60.6239, GNorm = 0.9661, lr_0 = 3.1405e-04
Loss = 4.1621e-01, PNorm = 60.6255, GNorm = 1.6000, lr_0 = 3.1383e-04
Loss = 3.3201e-01, PNorm = 60.6273, GNorm = 1.0950, lr_0 = 3.1362e-04
Loss = 3.7419e-01, PNorm = 60.6323, GNorm = 1.2157, lr_0 = 3.1340e-04
Loss = 3.6151e-01, PNorm = 60.6364, GNorm = 1.0582, lr_0 = 3.1319e-04
Loss = 3.2584e-01, PNorm = 60.6425, GNorm = 1.0601, lr_0 = 3.1297e-04
Loss = 3.9523e-01, PNorm = 60.6513, GNorm = 1.4328, lr_0 = 3.1276e-04
Loss = 3.9320e-01, PNorm = 60.6594, GNorm = 1.3388, lr_0 = 3.1254e-04
Loss = 3.9198e-01, PNorm = 60.6674, GNorm = 1.1750, lr_0 = 3.1233e-04
Loss = 4.1634e-01, PNorm = 60.6708, GNorm = 1.0919, lr_0 = 3.1212e-04
Loss = 3.8846e-01, PNorm = 60.6787, GNorm = 1.2745, lr_0 = 3.1190e-04
Loss = 4.0063e-01, PNorm = 60.6827, GNorm = 2.1806, lr_0 = 3.1169e-04
Loss = 3.7160e-01, PNorm = 60.6863, GNorm = 1.3424, lr_0 = 3.1147e-04
Loss = 3.4106e-01, PNorm = 60.6928, GNorm = 1.4448, lr_0 = 3.1126e-04
Loss = 4.2811e-01, PNorm = 60.6953, GNorm = 1.2345, lr_0 = 3.1105e-04
Loss = 3.1544e-01, PNorm = 60.7027, GNorm = 1.0681, lr_0 = 3.1083e-04
Loss = 4.3136e-01, PNorm = 60.7105, GNorm = 1.0017, lr_0 = 3.1062e-04
Loss = 3.5068e-01, PNorm = 60.7161, GNorm = 1.0852, lr_0 = 3.1041e-04
Loss = 3.6762e-01, PNorm = 60.7195, GNorm = 1.6771, lr_0 = 3.1020e-04
Loss = 3.7588e-01, PNorm = 60.7251, GNorm = 1.3132, lr_0 = 3.0998e-04
Loss = 3.5701e-01, PNorm = 60.7291, GNorm = 1.8535, lr_0 = 3.0977e-04
Loss = 3.4312e-01, PNorm = 60.7287, GNorm = 0.9102, lr_0 = 3.0956e-04
Loss = 3.3596e-01, PNorm = 60.7282, GNorm = 1.9064, lr_0 = 3.0935e-04
Loss = 3.6522e-01, PNorm = 60.7307, GNorm = 1.0182, lr_0 = 3.0914e-04
Loss = 4.0130e-01, PNorm = 60.7328, GNorm = 1.3420, lr_0 = 3.0892e-04
Loss = 4.0912e-01, PNorm = 60.7368, GNorm = 1.4950, lr_0 = 3.0871e-04
Loss = 3.6857e-01, PNorm = 60.7415, GNorm = 1.0901, lr_0 = 3.0850e-04
Loss = 4.3432e-01, PNorm = 60.7484, GNorm = 1.5997, lr_0 = 3.0829e-04
Loss = 3.5366e-01, PNorm = 60.7529, GNorm = 1.1805, lr_0 = 3.0808e-04
Loss = 3.5709e-01, PNorm = 60.7577, GNorm = 1.5282, lr_0 = 3.0787e-04
Loss = 3.5796e-01, PNorm = 60.7574, GNorm = 1.4542, lr_0 = 3.0766e-04
Loss = 3.5878e-01, PNorm = 60.7645, GNorm = 1.1063, lr_0 = 3.0745e-04
Loss = 4.0579e-01, PNorm = 60.7655, GNorm = 1.3517, lr_0 = 3.0723e-04
Loss = 3.8305e-01, PNorm = 60.7646, GNorm = 1.3449, lr_0 = 3.0702e-04
Loss = 3.3329e-01, PNorm = 60.7656, GNorm = 1.2122, lr_0 = 3.0681e-04
Loss = 3.9053e-01, PNorm = 60.7710, GNorm = 1.0603, lr_0 = 3.0660e-04
Loss = 3.8617e-01, PNorm = 60.7794, GNorm = 1.3931, lr_0 = 3.0639e-04
Loss = 4.1075e-01, PNorm = 60.7812, GNorm = 1.6314, lr_0 = 3.0618e-04
Loss = 4.4008e-01, PNorm = 60.7843, GNorm = 0.7690, lr_0 = 3.0597e-04
Loss = 3.6003e-01, PNorm = 60.7888, GNorm = 1.1383, lr_0 = 3.0576e-04
Loss = 4.0883e-01, PNorm = 60.7920, GNorm = 2.0208, lr_0 = 3.0555e-04
Loss = 3.8205e-01, PNorm = 60.7945, GNorm = 1.4433, lr_0 = 3.0535e-04
Loss = 3.7817e-01, PNorm = 60.8014, GNorm = 0.9000, lr_0 = 3.0514e-04
Loss = 3.7859e-01, PNorm = 60.8065, GNorm = 1.9595, lr_0 = 3.0493e-04
Loss = 4.3126e-01, PNorm = 60.8109, GNorm = 1.1356, lr_0 = 3.0472e-04
Loss = 3.5030e-01, PNorm = 60.8142, GNorm = 1.0841, lr_0 = 3.0451e-04
Loss = 3.9034e-01, PNorm = 60.8160, GNorm = 2.2019, lr_0 = 3.0430e-04
Loss = 3.4284e-01, PNorm = 60.8181, GNorm = 0.9312, lr_0 = 3.0409e-04
Loss = 4.1802e-01, PNorm = 60.8193, GNorm = 1.5060, lr_0 = 3.0388e-04
Loss = 3.7038e-01, PNorm = 60.8254, GNorm = 1.2973, lr_0 = 3.0368e-04
Loss = 4.1685e-01, PNorm = 60.8238, GNorm = 1.2366, lr_0 = 3.0347e-04
Loss = 4.2794e-01, PNorm = 60.8280, GNorm = 1.0165, lr_0 = 3.0326e-04
Loss = 3.4682e-01, PNorm = 60.8345, GNorm = 1.1171, lr_0 = 3.0305e-04
Loss = 3.7074e-01, PNorm = 60.8396, GNorm = 1.3118, lr_0 = 3.0284e-04
Loss = 3.5157e-01, PNorm = 60.8455, GNorm = 1.3830, lr_0 = 3.0264e-04
Loss = 4.0513e-01, PNorm = 60.8517, GNorm = 1.4250, lr_0 = 3.0243e-04
Loss = 3.5258e-01, PNorm = 60.8532, GNorm = 1.1224, lr_0 = 3.0222e-04
Loss = 3.6740e-01, PNorm = 60.8589, GNorm = 1.1706, lr_0 = 3.0202e-04
Loss = 3.6113e-01, PNorm = 60.8626, GNorm = 1.2915, lr_0 = 3.0181e-04
Loss = 4.0712e-01, PNorm = 60.8620, GNorm = 2.3058, lr_0 = 3.0160e-04
Loss = 4.0145e-01, PNorm = 60.8654, GNorm = 1.4541, lr_0 = 3.0140e-04
Loss = 4.3025e-01, PNorm = 60.8676, GNorm = 1.4086, lr_0 = 3.0119e-04
Loss = 3.5036e-01, PNorm = 60.8753, GNorm = 1.2022, lr_0 = 3.0098e-04
Loss = 3.2250e-01, PNorm = 60.8821, GNorm = 1.2928, lr_0 = 3.0078e-04
Loss = 4.2643e-01, PNorm = 60.8879, GNorm = 1.3245, lr_0 = 3.0057e-04
Loss = 3.5954e-01, PNorm = 60.8929, GNorm = 1.4899, lr_0 = 3.0036e-04
Loss = 4.2643e-01, PNorm = 60.8955, GNorm = 1.9288, lr_0 = 3.0016e-04
Loss = 3.7635e-01, PNorm = 60.9009, GNorm = 1.9994, lr_0 = 2.9995e-04
Loss = 3.6309e-01, PNorm = 60.9053, GNorm = 1.4525, lr_0 = 2.9975e-04
Loss = 3.5029e-01, PNorm = 60.9071, GNorm = 1.6448, lr_0 = 2.9954e-04
Loss = 4.0591e-01, PNorm = 60.9082, GNorm = 1.2373, lr_0 = 2.9934e-04
Loss = 3.9878e-01, PNorm = 60.9124, GNorm = 1.7462, lr_0 = 2.9913e-04
Loss = 3.8547e-01, PNorm = 60.9154, GNorm = 1.2740, lr_0 = 2.9893e-04
Loss = 4.3587e-01, PNorm = 60.9185, GNorm = 1.4614, lr_0 = 2.9872e-04
Loss = 4.0288e-01, PNorm = 60.9239, GNorm = 1.4230, lr_0 = 2.9852e-04
Loss = 3.8775e-01, PNorm = 60.9298, GNorm = 1.5058, lr_0 = 2.9831e-04
Loss = 3.3982e-01, PNorm = 60.9333, GNorm = 1.2909, lr_0 = 2.9811e-04
Loss = 3.7241e-01, PNorm = 60.9348, GNorm = 1.3934, lr_0 = 2.9790e-04
Loss = 3.8183e-01, PNorm = 60.9405, GNorm = 1.1270, lr_0 = 2.9770e-04
Loss = 4.3046e-01, PNorm = 60.9385, GNorm = 1.2273, lr_0 = 2.9750e-04
Loss = 4.1036e-01, PNorm = 60.9470, GNorm = 1.6224, lr_0 = 2.9729e-04
Loss = 3.4101e-01, PNorm = 60.9549, GNorm = 1.0622, lr_0 = 2.9709e-04
Loss = 3.9773e-01, PNorm = 60.9601, GNorm = 1.3790, lr_0 = 2.9689e-04
Loss = 3.9142e-01, PNorm = 60.9614, GNorm = 1.4001, lr_0 = 2.9668e-04
Loss = 3.7364e-01, PNorm = 60.9640, GNorm = 1.1947, lr_0 = 2.9648e-04
Loss = 3.6495e-01, PNorm = 60.9693, GNorm = 1.7005, lr_0 = 2.9628e-04
Loss = 4.0102e-01, PNorm = 60.9718, GNorm = 1.7448, lr_0 = 2.9607e-04
Loss = 3.7769e-01, PNorm = 60.9741, GNorm = 1.2585, lr_0 = 2.9587e-04
Loss = 3.9513e-01, PNorm = 60.9766, GNorm = 3.1125, lr_0 = 2.9567e-04
Loss = 3.8633e-01, PNorm = 60.9794, GNorm = 1.6437, lr_0 = 2.9546e-04
Loss = 3.6644e-01, PNorm = 60.9880, GNorm = 1.6832, lr_0 = 2.9526e-04
Loss = 4.4103e-01, PNorm = 60.9952, GNorm = 1.3143, lr_0 = 2.9506e-04
Loss = 3.5978e-01, PNorm = 60.9993, GNorm = 1.2186, lr_0 = 2.9486e-04
Loss = 3.7679e-01, PNorm = 61.0015, GNorm = 1.4704, lr_0 = 2.9466e-04
Loss = 3.8531e-01, PNorm = 61.0051, GNorm = 2.1677, lr_0 = 2.9445e-04
Loss = 3.7059e-01, PNorm = 61.0082, GNorm = 1.8179, lr_0 = 2.9425e-04
Loss = 3.9862e-01, PNorm = 61.0145, GNorm = 1.3639, lr_0 = 2.9405e-04
Loss = 3.7565e-01, PNorm = 61.0211, GNorm = 1.5556, lr_0 = 2.9385e-04
Loss = 4.3018e-01, PNorm = 61.0259, GNorm = 2.6172, lr_0 = 2.9365e-04
Loss = 4.2770e-01, PNorm = 61.0277, GNorm = 1.5757, lr_0 = 2.9345e-04
Loss = 3.4977e-01, PNorm = 61.0268, GNorm = 1.5259, lr_0 = 2.9325e-04
Loss = 3.7366e-01, PNorm = 61.0274, GNorm = 1.3478, lr_0 = 2.9305e-04
Loss = 3.7447e-01, PNorm = 61.0316, GNorm = 1.2774, lr_0 = 2.9284e-04
Loss = 3.6273e-01, PNorm = 61.0372, GNorm = 1.1594, lr_0 = 2.9264e-04
Loss = 4.3494e-01, PNorm = 61.0439, GNorm = 1.9043, lr_0 = 2.9244e-04
Loss = 3.7661e-01, PNorm = 61.0512, GNorm = 1.1792, lr_0 = 2.9224e-04
Loss = 3.5634e-01, PNorm = 61.0510, GNorm = 1.7340, lr_0 = 2.9204e-04
Loss = 3.7726e-01, PNorm = 61.0569, GNorm = 1.8170, lr_0 = 2.9184e-04
Loss = 3.8358e-01, PNorm = 61.0603, GNorm = 1.5801, lr_0 = 2.9164e-04
Loss = 3.8515e-01, PNorm = 61.0647, GNorm = 1.0691, lr_0 = 2.9144e-04
Loss = 4.0728e-01, PNorm = 61.0668, GNorm = 1.2436, lr_0 = 2.9124e-04
Validation mae = 0.112276
Epoch 17
Loss = 3.5749e-01, PNorm = 61.0691, GNorm = 1.2744, lr_0 = 2.9104e-04
Loss = 3.3022e-01, PNorm = 61.0707, GNorm = 1.4094, lr_0 = 2.9084e-04
Loss = 3.4365e-01, PNorm = 61.0737, GNorm = 1.3773, lr_0 = 2.9065e-04
Loss = 3.8688e-01, PNorm = 61.0748, GNorm = 2.3661, lr_0 = 2.9045e-04
Loss = 3.6617e-01, PNorm = 61.0738, GNorm = 1.8705, lr_0 = 2.9025e-04
Loss = 4.3693e-01, PNorm = 61.0773, GNorm = 1.8333, lr_0 = 2.9005e-04
Loss = 3.4847e-01, PNorm = 61.0834, GNorm = 1.1911, lr_0 = 2.8985e-04
Loss = 3.5603e-01, PNorm = 61.0900, GNorm = 1.5020, lr_0 = 2.8965e-04
Loss = 3.2271e-01, PNorm = 61.0948, GNorm = 1.1153, lr_0 = 2.8945e-04
Loss = 3.9499e-01, PNorm = 61.0974, GNorm = 1.7727, lr_0 = 2.8925e-04
Loss = 3.3621e-01, PNorm = 61.0994, GNorm = 1.1597, lr_0 = 2.8906e-04
Loss = 3.8343e-01, PNorm = 61.1054, GNorm = 1.3177, lr_0 = 2.8886e-04
Loss = 3.6772e-01, PNorm = 61.1124, GNorm = 1.5942, lr_0 = 2.8866e-04
Loss = 3.6756e-01, PNorm = 61.1150, GNorm = 1.2263, lr_0 = 2.8846e-04
Loss = 3.5486e-01, PNorm = 61.1152, GNorm = 1.7879, lr_0 = 2.8826e-04
Loss = 3.5820e-01, PNorm = 61.1150, GNorm = 1.5339, lr_0 = 2.8807e-04
Loss = 3.4625e-01, PNorm = 61.1191, GNorm = 1.8508, lr_0 = 2.8787e-04
Loss = 3.7901e-01, PNorm = 61.1253, GNorm = 2.2921, lr_0 = 2.8767e-04
Loss = 3.6922e-01, PNorm = 61.1305, GNorm = 1.1786, lr_0 = 2.8748e-04
Loss = 4.3579e-01, PNorm = 61.1343, GNorm = 1.9321, lr_0 = 2.8728e-04
Loss = 3.6892e-01, PNorm = 61.1445, GNorm = 1.9357, lr_0 = 2.8708e-04
Loss = 3.8427e-01, PNorm = 61.1464, GNorm = 1.6457, lr_0 = 2.8689e-04
Loss = 3.3551e-01, PNorm = 61.1454, GNorm = 1.4650, lr_0 = 2.8669e-04
Loss = 3.5883e-01, PNorm = 61.1502, GNorm = 1.1989, lr_0 = 2.8649e-04
Loss = 3.5432e-01, PNorm = 61.1595, GNorm = 1.8611, lr_0 = 2.8630e-04
Loss = 4.2079e-01, PNorm = 61.1620, GNorm = 1.3344, lr_0 = 2.8610e-04
Loss = 3.6125e-01, PNorm = 61.1641, GNorm = 1.2043, lr_0 = 2.8590e-04
Loss = 3.9758e-01, PNorm = 61.1662, GNorm = 1.8436, lr_0 = 2.8571e-04
Loss = 3.5352e-01, PNorm = 61.1692, GNorm = 1.0194, lr_0 = 2.8551e-04
Loss = 4.0779e-01, PNorm = 61.1728, GNorm = 1.3007, lr_0 = 2.8532e-04
Loss = 3.9012e-01, PNorm = 61.1756, GNorm = 2.0631, lr_0 = 2.8512e-04
Loss = 4.0130e-01, PNorm = 61.1790, GNorm = 1.5588, lr_0 = 2.8493e-04
Loss = 3.8529e-01, PNorm = 61.1843, GNorm = 0.9950, lr_0 = 2.8473e-04
Loss = 3.7402e-01, PNorm = 61.1919, GNorm = 1.3296, lr_0 = 2.8454e-04
Loss = 3.7414e-01, PNorm = 61.1912, GNorm = 1.5614, lr_0 = 2.8434e-04
Loss = 3.4991e-01, PNorm = 61.1927, GNorm = 1.5973, lr_0 = 2.8415e-04
Loss = 3.2033e-01, PNorm = 61.1963, GNorm = 1.2014, lr_0 = 2.8395e-04
Loss = 3.7182e-01, PNorm = 61.2014, GNorm = 1.7616, lr_0 = 2.8376e-04
Loss = 3.5437e-01, PNorm = 61.2067, GNorm = 1.3679, lr_0 = 2.8356e-04
Loss = 3.6609e-01, PNorm = 61.2096, GNorm = 1.2518, lr_0 = 2.8337e-04
Loss = 4.0096e-01, PNorm = 61.2116, GNorm = 1.6261, lr_0 = 2.8317e-04
Loss = 3.8502e-01, PNorm = 61.2165, GNorm = 1.2683, lr_0 = 2.8298e-04
Loss = 4.1802e-01, PNorm = 61.2181, GNorm = 1.1620, lr_0 = 2.8279e-04
Loss = 3.9470e-01, PNorm = 61.2200, GNorm = 1.3937, lr_0 = 2.8259e-04
Loss = 4.4063e-01, PNorm = 61.2244, GNorm = 1.3274, lr_0 = 2.8240e-04
Loss = 3.5369e-01, PNorm = 61.2280, GNorm = 1.6684, lr_0 = 2.8221e-04
Loss = 3.7126e-01, PNorm = 61.2292, GNorm = 1.4786, lr_0 = 2.8201e-04
Loss = 3.8584e-01, PNorm = 61.2343, GNorm = 1.3130, lr_0 = 2.8182e-04
Loss = 3.7908e-01, PNorm = 61.2381, GNorm = 1.3866, lr_0 = 2.8163e-04
Loss = 3.7577e-01, PNorm = 61.2393, GNorm = 2.0285, lr_0 = 2.8143e-04
Loss = 3.6242e-01, PNorm = 61.2440, GNorm = 1.8680, lr_0 = 2.8124e-04
Loss = 3.6259e-01, PNorm = 61.2455, GNorm = 1.6744, lr_0 = 2.8105e-04
Loss = 3.4820e-01, PNorm = 61.2473, GNorm = 1.2044, lr_0 = 2.8085e-04
Loss = 3.3764e-01, PNorm = 61.2527, GNorm = 1.7314, lr_0 = 2.8066e-04
Loss = 3.4238e-01, PNorm = 61.2565, GNorm = 1.1537, lr_0 = 2.8047e-04
Loss = 3.4133e-01, PNorm = 61.2621, GNorm = 1.3242, lr_0 = 2.8028e-04
Loss = 3.4791e-01, PNorm = 61.2662, GNorm = 1.1165, lr_0 = 2.8009e-04
Loss = 3.4215e-01, PNorm = 61.2696, GNorm = 1.6505, lr_0 = 2.7989e-04
Loss = 3.9791e-01, PNorm = 61.2744, GNorm = 1.5507, lr_0 = 2.7970e-04
Loss = 3.5360e-01, PNorm = 61.2798, GNorm = 1.4817, lr_0 = 2.7951e-04
Loss = 3.7555e-01, PNorm = 61.2827, GNorm = 1.5462, lr_0 = 2.7932e-04
Loss = 3.6419e-01, PNorm = 61.2857, GNorm = 1.4836, lr_0 = 2.7913e-04
Loss = 4.0662e-01, PNorm = 61.2874, GNorm = 1.2808, lr_0 = 2.7894e-04
Loss = 3.6993e-01, PNorm = 61.2895, GNorm = 1.2249, lr_0 = 2.7875e-04
Loss = 4.4164e-01, PNorm = 61.2948, GNorm = 2.3139, lr_0 = 2.7855e-04
Loss = 3.7486e-01, PNorm = 61.2969, GNorm = 1.5283, lr_0 = 2.7836e-04
Loss = 3.5794e-01, PNorm = 61.2986, GNorm = 1.3158, lr_0 = 2.7817e-04
Loss = 3.9961e-01, PNorm = 61.3011, GNorm = 1.6514, lr_0 = 2.7798e-04
Loss = 3.4231e-01, PNorm = 61.3046, GNorm = 1.5941, lr_0 = 2.7779e-04
Loss = 3.7204e-01, PNorm = 61.3103, GNorm = 1.8454, lr_0 = 2.7760e-04
Loss = 3.9968e-01, PNorm = 61.3132, GNorm = 1.7105, lr_0 = 2.7741e-04
Loss = 3.5706e-01, PNorm = 61.3189, GNorm = 1.4884, lr_0 = 2.7722e-04
Loss = 4.6401e-01, PNorm = 61.3215, GNorm = 1.8881, lr_0 = 2.7703e-04
Loss = 3.7341e-01, PNorm = 61.3256, GNorm = 1.1680, lr_0 = 2.7684e-04
Loss = 3.6035e-01, PNorm = 61.3275, GNorm = 1.2135, lr_0 = 2.7665e-04
Loss = 3.6377e-01, PNorm = 61.3276, GNorm = 1.6432, lr_0 = 2.7646e-04
Loss = 3.5052e-01, PNorm = 61.3275, GNorm = 1.0745, lr_0 = 2.7627e-04
Loss = 3.8785e-01, PNorm = 61.3284, GNorm = 1.5997, lr_0 = 2.7608e-04
Loss = 4.1805e-01, PNorm = 61.3332, GNorm = 0.8679, lr_0 = 2.7590e-04
Loss = 3.9713e-01, PNorm = 61.3357, GNorm = 1.4485, lr_0 = 2.7571e-04
Loss = 4.1364e-01, PNorm = 61.3395, GNorm = 2.0303, lr_0 = 2.7552e-04
Loss = 3.5843e-01, PNorm = 61.3428, GNorm = 1.2704, lr_0 = 2.7533e-04
Loss = 3.5332e-01, PNorm = 61.3460, GNorm = 1.1802, lr_0 = 2.7514e-04
Loss = 4.1196e-01, PNorm = 61.3496, GNorm = 2.3721, lr_0 = 2.7495e-04
Loss = 3.6821e-01, PNorm = 61.3548, GNorm = 1.4193, lr_0 = 2.7476e-04
Loss = 3.6454e-01, PNorm = 61.3565, GNorm = 1.5017, lr_0 = 2.7457e-04
Loss = 4.1551e-01, PNorm = 61.3588, GNorm = 1.7368, lr_0 = 2.7439e-04
Loss = 3.8445e-01, PNorm = 61.3615, GNorm = 1.4185, lr_0 = 2.7420e-04
Loss = 3.4846e-01, PNorm = 61.3650, GNorm = 1.2503, lr_0 = 2.7401e-04
Loss = 4.0382e-01, PNorm = 61.3687, GNorm = 1.4271, lr_0 = 2.7382e-04
Loss = 3.5446e-01, PNorm = 61.3726, GNorm = 1.9607, lr_0 = 2.7364e-04
Loss = 3.3131e-01, PNorm = 61.3737, GNorm = 0.9122, lr_0 = 2.7345e-04
Loss = 3.8716e-01, PNorm = 61.3815, GNorm = 1.4260, lr_0 = 2.7326e-04
Loss = 3.7759e-01, PNorm = 61.3833, GNorm = 1.2655, lr_0 = 2.7307e-04
Loss = 3.6764e-01, PNorm = 61.3833, GNorm = 2.0380, lr_0 = 2.7289e-04
Loss = 4.3895e-01, PNorm = 61.3902, GNorm = 1.4743, lr_0 = 2.7270e-04
Loss = 3.5346e-01, PNorm = 61.3939, GNorm = 1.1770, lr_0 = 2.7251e-04
Loss = 3.7095e-01, PNorm = 61.3969, GNorm = 1.5496, lr_0 = 2.7233e-04
Loss = 3.6276e-01, PNorm = 61.3983, GNorm = 1.5755, lr_0 = 2.7214e-04
Loss = 3.7793e-01, PNorm = 61.4016, GNorm = 1.2877, lr_0 = 2.7195e-04
Loss = 3.7080e-01, PNorm = 61.4045, GNorm = 1.1464, lr_0 = 2.7177e-04
Loss = 3.7272e-01, PNorm = 61.4119, GNorm = 1.2925, lr_0 = 2.7158e-04
Loss = 3.7099e-01, PNorm = 61.4145, GNorm = 1.2864, lr_0 = 2.7139e-04
Loss = 3.7691e-01, PNorm = 61.4147, GNorm = 1.4753, lr_0 = 2.7121e-04
Loss = 4.0853e-01, PNorm = 61.4181, GNorm = 1.4494, lr_0 = 2.7102e-04
Loss = 3.6817e-01, PNorm = 61.4227, GNorm = 1.7873, lr_0 = 2.7084e-04
Loss = 3.5941e-01, PNorm = 61.4281, GNorm = 1.8517, lr_0 = 2.7065e-04
Loss = 4.0193e-01, PNorm = 61.4317, GNorm = 1.3113, lr_0 = 2.7047e-04
Loss = 3.2693e-01, PNorm = 61.4297, GNorm = 1.2180, lr_0 = 2.7028e-04
Loss = 4.2958e-01, PNorm = 61.4312, GNorm = 1.5094, lr_0 = 2.7010e-04
Loss = 3.4197e-01, PNorm = 61.4363, GNorm = 1.6769, lr_0 = 2.6991e-04
Loss = 3.6894e-01, PNorm = 61.4392, GNorm = 1.1071, lr_0 = 2.6973e-04
Loss = 3.6165e-01, PNorm = 61.4435, GNorm = 1.5942, lr_0 = 2.6954e-04
Loss = 3.9019e-01, PNorm = 61.4504, GNorm = 1.3951, lr_0 = 2.6936e-04
Loss = 3.3716e-01, PNorm = 61.4549, GNorm = 1.5530, lr_0 = 2.6917e-04
Loss = 3.7773e-01, PNorm = 61.4585, GNorm = 1.9980, lr_0 = 2.6899e-04
Loss = 3.9253e-01, PNorm = 61.4628, GNorm = 1.6782, lr_0 = 2.6880e-04
Loss = 3.7967e-01, PNorm = 61.4629, GNorm = 1.6655, lr_0 = 2.6862e-04
Loss = 3.7041e-01, PNorm = 61.4680, GNorm = 1.3095, lr_0 = 2.6844e-04
Loss = 4.0148e-01, PNorm = 61.4714, GNorm = 1.4951, lr_0 = 2.6825e-04
Validation mae = 0.112829
Epoch 18
Loss = 3.7365e-01, PNorm = 61.4752, GNorm = 1.8169, lr_0 = 2.6807e-04
Loss = 3.5329e-01, PNorm = 61.4772, GNorm = 1.7008, lr_0 = 2.6788e-04
Loss = 3.9316e-01, PNorm = 61.4851, GNorm = 1.6169, lr_0 = 2.6770e-04
Loss = 3.7715e-01, PNorm = 61.4868, GNorm = 1.9709, lr_0 = 2.6752e-04
Loss = 3.4912e-01, PNorm = 61.4893, GNorm = 1.2963, lr_0 = 2.6733e-04
Loss = 3.5152e-01, PNorm = 61.4928, GNorm = 1.5491, lr_0 = 2.6715e-04
Loss = 3.8084e-01, PNorm = 61.4981, GNorm = 1.6968, lr_0 = 2.6697e-04
Loss = 4.0676e-01, PNorm = 61.5024, GNorm = 1.9432, lr_0 = 2.6678e-04
Loss = 3.2551e-01, PNorm = 61.5044, GNorm = 1.3554, lr_0 = 2.6660e-04
Loss = 3.6589e-01, PNorm = 61.5099, GNorm = 1.6798, lr_0 = 2.6642e-04
Loss = 2.9841e-01, PNorm = 61.5122, GNorm = 1.2643, lr_0 = 2.6624e-04
Loss = 3.4389e-01, PNorm = 61.5138, GNorm = 1.4260, lr_0 = 2.6605e-04
Loss = 3.8877e-01, PNorm = 61.5147, GNorm = 1.2785, lr_0 = 2.6587e-04
Loss = 4.2469e-01, PNorm = 61.5190, GNorm = 1.7281, lr_0 = 2.6569e-04
Loss = 3.6731e-01, PNorm = 61.5233, GNorm = 1.3670, lr_0 = 2.6551e-04
Loss = 3.4733e-01, PNorm = 61.5262, GNorm = 1.2447, lr_0 = 2.6533e-04
Loss = 3.5649e-01, PNorm = 61.5284, GNorm = 1.0909, lr_0 = 2.6514e-04
Loss = 3.5833e-01, PNorm = 61.5326, GNorm = 1.1715, lr_0 = 2.6496e-04
Loss = 3.8294e-01, PNorm = 61.5366, GNorm = 1.6398, lr_0 = 2.6478e-04
Loss = 4.0312e-01, PNorm = 61.5432, GNorm = 2.1132, lr_0 = 2.6460e-04
Loss = 4.1842e-01, PNorm = 61.5471, GNorm = 1.6737, lr_0 = 2.6442e-04
Loss = 3.9425e-01, PNorm = 61.5496, GNorm = 1.3891, lr_0 = 2.6424e-04
Loss = 3.5892e-01, PNorm = 61.5531, GNorm = 1.3275, lr_0 = 2.6406e-04
Loss = 3.7077e-01, PNorm = 61.5544, GNorm = 0.9651, lr_0 = 2.6388e-04
Loss = 3.4742e-01, PNorm = 61.5569, GNorm = 1.1191, lr_0 = 2.6369e-04
Loss = 3.2938e-01, PNorm = 61.5582, GNorm = 0.9715, lr_0 = 2.6351e-04
Loss = 4.1155e-01, PNorm = 61.5633, GNorm = 1.2822, lr_0 = 2.6333e-04
Loss = 3.6752e-01, PNorm = 61.5673, GNorm = 1.2869, lr_0 = 2.6315e-04
Loss = 3.2998e-01, PNorm = 61.5683, GNorm = 1.7942, lr_0 = 2.6297e-04
Loss = 3.6747e-01, PNorm = 61.5715, GNorm = 1.1841, lr_0 = 2.6279e-04
Loss = 3.5899e-01, PNorm = 61.5745, GNorm = 1.4005, lr_0 = 2.6261e-04
Loss = 4.0518e-01, PNorm = 61.5774, GNorm = 1.6074, lr_0 = 2.6243e-04
Loss = 4.2441e-01, PNorm = 61.5784, GNorm = 1.6050, lr_0 = 2.6225e-04
Loss = 3.6289e-01, PNorm = 61.5797, GNorm = 0.8954, lr_0 = 2.6207e-04
Loss = 3.5927e-01, PNorm = 61.5838, GNorm = 1.0017, lr_0 = 2.6189e-04
Loss = 3.8441e-01, PNorm = 61.5872, GNorm = 1.6589, lr_0 = 2.6171e-04
Loss = 3.5831e-01, PNorm = 61.5938, GNorm = 1.3587, lr_0 = 2.6153e-04
Loss = 4.0634e-01, PNorm = 61.5966, GNorm = 1.7402, lr_0 = 2.6136e-04
Loss = 3.3563e-01, PNorm = 61.5973, GNorm = 1.8506, lr_0 = 2.6118e-04
Loss = 3.5552e-01, PNorm = 61.5998, GNorm = 1.0065, lr_0 = 2.6100e-04
Loss = 3.8544e-01, PNorm = 61.6039, GNorm = 1.2546, lr_0 = 2.6082e-04
Loss = 3.9931e-01, PNorm = 61.6086, GNorm = 1.2181, lr_0 = 2.6064e-04
Loss = 3.5208e-01, PNorm = 61.6125, GNorm = 1.2206, lr_0 = 2.6046e-04
Loss = 3.3395e-01, PNorm = 61.6178, GNorm = 1.2979, lr_0 = 2.6028e-04
Loss = 3.8056e-01, PNorm = 61.6224, GNorm = 1.4798, lr_0 = 2.6011e-04
Loss = 3.6525e-01, PNorm = 61.6267, GNorm = 1.7601, lr_0 = 2.5993e-04
Loss = 3.8039e-01, PNorm = 61.6297, GNorm = 1.1883, lr_0 = 2.5975e-04
Loss = 3.3966e-01, PNorm = 61.6345, GNorm = 1.7037, lr_0 = 2.5957e-04
Loss = 3.4477e-01, PNorm = 61.6383, GNorm = 1.5354, lr_0 = 2.5939e-04
Loss = 3.8632e-01, PNorm = 61.6406, GNorm = 1.2166, lr_0 = 2.5922e-04
Loss = 3.4522e-01, PNorm = 61.6452, GNorm = 1.3148, lr_0 = 2.5904e-04
Loss = 3.5918e-01, PNorm = 61.6489, GNorm = 1.1718, lr_0 = 2.5886e-04
Loss = 3.8628e-01, PNorm = 61.6528, GNorm = 1.5852, lr_0 = 2.5868e-04
Loss = 4.0850e-01, PNorm = 61.6556, GNorm = 1.6208, lr_0 = 2.5851e-04
Loss = 3.9123e-01, PNorm = 61.6609, GNorm = 1.7101, lr_0 = 2.5833e-04
Loss = 3.6945e-01, PNorm = 61.6609, GNorm = 1.1585, lr_0 = 2.5815e-04
Loss = 3.7969e-01, PNorm = 61.6623, GNorm = 1.2862, lr_0 = 2.5797e-04
Loss = 3.7282e-01, PNorm = 61.6686, GNorm = 0.9751, lr_0 = 2.5780e-04
Loss = 3.8594e-01, PNorm = 61.6728, GNorm = 1.6135, lr_0 = 2.5762e-04
Loss = 3.8984e-01, PNorm = 61.6783, GNorm = 1.1958, lr_0 = 2.5745e-04
Loss = 3.5884e-01, PNorm = 61.6808, GNorm = 1.2216, lr_0 = 2.5727e-04
Loss = 3.3226e-01, PNorm = 61.6821, GNorm = 1.0939, lr_0 = 2.5709e-04
Loss = 3.5361e-01, PNorm = 61.6860, GNorm = 1.1733, lr_0 = 2.5692e-04
Loss = 3.8473e-01, PNorm = 61.6912, GNorm = 1.3602, lr_0 = 2.5674e-04
Loss = 3.7892e-01, PNorm = 61.6955, GNorm = 1.5323, lr_0 = 2.5656e-04
Loss = 3.8423e-01, PNorm = 61.6994, GNorm = 1.4362, lr_0 = 2.5639e-04
Loss = 3.5309e-01, PNorm = 61.6993, GNorm = 1.8563, lr_0 = 2.5621e-04
Loss = 4.1845e-01, PNorm = 61.7006, GNorm = 1.1341, lr_0 = 2.5604e-04
Loss = 3.9358e-01, PNorm = 61.7030, GNorm = 1.5856, lr_0 = 2.5586e-04
Loss = 3.5891e-01, PNorm = 61.7078, GNorm = 1.3029, lr_0 = 2.5569e-04
Loss = 3.6816e-01, PNorm = 61.7113, GNorm = 1.3622, lr_0 = 2.5551e-04
Loss = 3.9006e-01, PNorm = 61.7154, GNorm = 1.3277, lr_0 = 2.5534e-04
Loss = 3.8803e-01, PNorm = 61.7189, GNorm = 1.6606, lr_0 = 2.5516e-04
Loss = 3.4499e-01, PNorm = 61.7198, GNorm = 1.3469, lr_0 = 2.5499e-04
Loss = 3.4826e-01, PNorm = 61.7215, GNorm = 1.2995, lr_0 = 2.5481e-04
Loss = 3.2367e-01, PNorm = 61.7239, GNorm = 1.4882, lr_0 = 2.5464e-04
Loss = 3.9092e-01, PNorm = 61.7243, GNorm = 0.9590, lr_0 = 2.5446e-04
Loss = 3.2753e-01, PNorm = 61.7247, GNorm = 1.4070, lr_0 = 2.5429e-04
Loss = 4.2859e-01, PNorm = 61.7290, GNorm = 1.4985, lr_0 = 2.5411e-04
Loss = 3.8634e-01, PNorm = 61.7330, GNorm = 1.1056, lr_0 = 2.5394e-04
Loss = 4.2797e-01, PNorm = 61.7367, GNorm = 2.9777, lr_0 = 2.5377e-04
Loss = 3.2245e-01, PNorm = 61.7402, GNorm = 1.5013, lr_0 = 2.5359e-04
Loss = 3.9367e-01, PNorm = 61.7403, GNorm = 1.4766, lr_0 = 2.5342e-04
Loss = 3.5847e-01, PNorm = 61.7432, GNorm = 1.5514, lr_0 = 2.5325e-04
Loss = 3.9628e-01, PNorm = 61.7461, GNorm = 2.7376, lr_0 = 2.5307e-04
Loss = 3.4772e-01, PNorm = 61.7504, GNorm = 1.0123, lr_0 = 2.5290e-04
Loss = 3.5236e-01, PNorm = 61.7498, GNorm = 1.4443, lr_0 = 2.5273e-04
Loss = 3.8606e-01, PNorm = 61.7491, GNorm = 1.1881, lr_0 = 2.5255e-04
Loss = 3.4929e-01, PNorm = 61.7487, GNorm = 0.9878, lr_0 = 2.5238e-04
Loss = 3.6239e-01, PNorm = 61.7511, GNorm = 1.2082, lr_0 = 2.5221e-04
Loss = 3.3227e-01, PNorm = 61.7539, GNorm = 1.3230, lr_0 = 2.5203e-04
Loss = 2.8899e-01, PNorm = 61.7556, GNorm = 1.0500, lr_0 = 2.5186e-04
Loss = 3.6605e-01, PNorm = 61.7555, GNorm = 1.5227, lr_0 = 2.5169e-04
Loss = 3.6447e-01, PNorm = 61.7562, GNorm = 1.3040, lr_0 = 2.5152e-04
Loss = 3.8745e-01, PNorm = 61.7592, GNorm = 1.4028, lr_0 = 2.5134e-04
Loss = 3.5493e-01, PNorm = 61.7612, GNorm = 1.2239, lr_0 = 2.5117e-04
Loss = 3.8387e-01, PNorm = 61.7626, GNorm = 1.5162, lr_0 = 2.5100e-04
Loss = 3.4956e-01, PNorm = 61.7662, GNorm = 1.6188, lr_0 = 2.5083e-04
Loss = 3.4283e-01, PNorm = 61.7672, GNorm = 1.2854, lr_0 = 2.5066e-04
Loss = 3.9262e-01, PNorm = 61.7708, GNorm = 1.1996, lr_0 = 2.5048e-04
Loss = 3.7968e-01, PNorm = 61.7748, GNorm = 1.8505, lr_0 = 2.5031e-04
Loss = 3.4344e-01, PNorm = 61.7765, GNorm = 1.4666, lr_0 = 2.5014e-04
Loss = 4.0056e-01, PNorm = 61.7832, GNorm = 1.4831, lr_0 = 2.4997e-04
Loss = 4.5407e-01, PNorm = 61.7857, GNorm = 1.4735, lr_0 = 2.4980e-04
Loss = 3.3968e-01, PNorm = 61.7904, GNorm = 1.0531, lr_0 = 2.4963e-04
Loss = 3.8945e-01, PNorm = 61.7915, GNorm = 1.5669, lr_0 = 2.4946e-04
Loss = 3.4375e-01, PNorm = 61.7965, GNorm = 1.3762, lr_0 = 2.4929e-04
Loss = 3.6043e-01, PNorm = 61.7986, GNorm = 1.3536, lr_0 = 2.4911e-04
Loss = 3.3968e-01, PNorm = 61.8054, GNorm = 1.5758, lr_0 = 2.4894e-04
Loss = 3.8097e-01, PNorm = 61.8089, GNorm = 1.4178, lr_0 = 2.4877e-04
Loss = 3.9172e-01, PNorm = 61.8087, GNorm = 1.4867, lr_0 = 2.4860e-04
Loss = 3.5823e-01, PNorm = 61.8106, GNorm = 1.5189, lr_0 = 2.4843e-04
Loss = 3.8101e-01, PNorm = 61.8136, GNorm = 1.1360, lr_0 = 2.4826e-04
Loss = 3.3129e-01, PNorm = 61.8158, GNorm = 1.4898, lr_0 = 2.4809e-04
Loss = 4.1608e-01, PNorm = 61.8191, GNorm = 1.1999, lr_0 = 2.4792e-04
Loss = 3.8913e-01, PNorm = 61.8232, GNorm = 1.1682, lr_0 = 2.4775e-04
Loss = 3.8434e-01, PNorm = 61.8258, GNorm = 1.7163, lr_0 = 2.4758e-04
Loss = 3.9692e-01, PNorm = 61.8289, GNorm = 1.4941, lr_0 = 2.4741e-04
Loss = 3.4029e-01, PNorm = 61.8306, GNorm = 1.4445, lr_0 = 2.4724e-04
Loss = 4.6413e-01, PNorm = 61.8300, GNorm = 1.1886, lr_0 = 2.4707e-04
Validation mae = 0.112745
Epoch 19
Loss = 3.7008e-01, PNorm = 61.8328, GNorm = 1.2829, lr_0 = 2.4690e-04
Loss = 3.6793e-01, PNorm = 61.8385, GNorm = 1.6529, lr_0 = 2.4674e-04
Loss = 3.3200e-01, PNorm = 61.8430, GNorm = 1.5122, lr_0 = 2.4657e-04
Loss = 4.0129e-01, PNorm = 61.8497, GNorm = 2.2147, lr_0 = 2.4640e-04
Loss = 3.6513e-01, PNorm = 61.8517, GNorm = 1.0997, lr_0 = 2.4623e-04
Loss = 3.5072e-01, PNorm = 61.8581, GNorm = 1.3760, lr_0 = 2.4606e-04
Loss = 3.7485e-01, PNorm = 61.8617, GNorm = 1.4897, lr_0 = 2.4589e-04
Loss = 3.3135e-01, PNorm = 61.8640, GNorm = 1.0477, lr_0 = 2.4572e-04
Loss = 3.8546e-01, PNorm = 61.8664, GNorm = 1.7446, lr_0 = 2.4556e-04
Loss = 3.7498e-01, PNorm = 61.8656, GNorm = 1.1024, lr_0 = 2.4539e-04
Loss = 3.3343e-01, PNorm = 61.8661, GNorm = 2.0671, lr_0 = 2.4522e-04
Loss = 3.3913e-01, PNorm = 61.8692, GNorm = 1.0474, lr_0 = 2.4505e-04
Loss = 3.6270e-01, PNorm = 61.8737, GNorm = 1.3603, lr_0 = 2.4488e-04
Loss = 3.1892e-01, PNorm = 61.8792, GNorm = 1.6470, lr_0 = 2.4472e-04
Loss = 3.3470e-01, PNorm = 61.8837, GNorm = 1.1520, lr_0 = 2.4455e-04
Loss = 3.8355e-01, PNorm = 61.8876, GNorm = 2.9095, lr_0 = 2.4438e-04
Loss = 3.5300e-01, PNorm = 61.8900, GNorm = 1.1997, lr_0 = 2.4421e-04
Loss = 3.6558e-01, PNorm = 61.8939, GNorm = 1.6369, lr_0 = 2.4405e-04
Loss = 4.0223e-01, PNorm = 61.8953, GNorm = 1.6483, lr_0 = 2.4388e-04
Loss = 4.5090e-01, PNorm = 61.8991, GNorm = 1.1534, lr_0 = 2.4371e-04
Loss = 3.6699e-01, PNorm = 61.9013, GNorm = 1.4136, lr_0 = 2.4354e-04
Loss = 3.9421e-01, PNorm = 61.9014, GNorm = 1.8909, lr_0 = 2.4338e-04
Loss = 3.7015e-01, PNorm = 61.9064, GNorm = 1.0811, lr_0 = 2.4321e-04
Loss = 3.6722e-01, PNorm = 61.9096, GNorm = 1.4675, lr_0 = 2.4304e-04
Loss = 3.5492e-01, PNorm = 61.9136, GNorm = 1.1570, lr_0 = 2.4288e-04
Loss = 4.0079e-01, PNorm = 61.9197, GNorm = 1.3629, lr_0 = 2.4271e-04
Loss = 3.5845e-01, PNorm = 61.9210, GNorm = 1.3299, lr_0 = 2.4254e-04
Loss = 3.7906e-01, PNorm = 61.9206, GNorm = 1.2708, lr_0 = 2.4238e-04
Loss = 3.6043e-01, PNorm = 61.9243, GNorm = 1.6500, lr_0 = 2.4221e-04
Loss = 4.1756e-01, PNorm = 61.9235, GNorm = 2.0490, lr_0 = 2.4205e-04
Loss = 3.6177e-01, PNorm = 61.9268, GNorm = 1.1894, lr_0 = 2.4188e-04
Loss = 3.3687e-01, PNorm = 61.9272, GNorm = 1.1156, lr_0 = 2.4171e-04
Loss = 3.7438e-01, PNorm = 61.9312, GNorm = 2.0614, lr_0 = 2.4155e-04
Loss = 3.4241e-01, PNorm = 61.9323, GNorm = 1.3720, lr_0 = 2.4138e-04
Loss = 3.1111e-01, PNorm = 61.9374, GNorm = 1.8793, lr_0 = 2.4122e-04
Loss = 3.9260e-01, PNorm = 61.9415, GNorm = 1.4379, lr_0 = 2.4105e-04
Loss = 3.5017e-01, PNorm = 61.9441, GNorm = 1.7036, lr_0 = 2.4089e-04
Loss = 3.7995e-01, PNorm = 61.9502, GNorm = 2.2274, lr_0 = 2.4072e-04
Loss = 3.7114e-01, PNorm = 61.9543, GNorm = 1.2397, lr_0 = 2.4056e-04
Loss = 3.9916e-01, PNorm = 61.9573, GNorm = 1.7640, lr_0 = 2.4039e-04
Loss = 3.4635e-01, PNorm = 61.9610, GNorm = 1.3606, lr_0 = 2.4023e-04
Loss = 3.7714e-01, PNorm = 61.9638, GNorm = 2.0763, lr_0 = 2.4006e-04
Loss = 3.2019e-01, PNorm = 61.9646, GNorm = 1.1597, lr_0 = 2.3990e-04
Loss = 3.2077e-01, PNorm = 61.9656, GNorm = 1.6308, lr_0 = 2.3974e-04
Loss = 3.5200e-01, PNorm = 61.9698, GNorm = 1.5033, lr_0 = 2.3957e-04
Loss = 4.1242e-01, PNorm = 61.9718, GNorm = 1.4679, lr_0 = 2.3941e-04
Loss = 3.4052e-01, PNorm = 61.9744, GNorm = 2.2996, lr_0 = 2.3924e-04
Loss = 3.5941e-01, PNorm = 61.9789, GNorm = 1.4538, lr_0 = 2.3908e-04
Loss = 3.6326e-01, PNorm = 61.9814, GNorm = 2.1237, lr_0 = 2.3892e-04
Loss = 3.6164e-01, PNorm = 61.9822, GNorm = 1.3628, lr_0 = 2.3875e-04
Loss = 3.5409e-01, PNorm = 61.9820, GNorm = 1.7543, lr_0 = 2.3859e-04
Loss = 3.4356e-01, PNorm = 61.9827, GNorm = 1.2578, lr_0 = 2.3842e-04
Loss = 3.1853e-01, PNorm = 61.9853, GNorm = 1.1465, lr_0 = 2.3826e-04
Loss = 3.6435e-01, PNorm = 61.9861, GNorm = 1.2387, lr_0 = 2.3810e-04
Loss = 3.5642e-01, PNorm = 61.9886, GNorm = 1.0112, lr_0 = 2.3794e-04
Loss = 4.1672e-01, PNorm = 61.9916, GNorm = 1.6973, lr_0 = 2.3777e-04
Loss = 4.1081e-01, PNorm = 61.9934, GNorm = 1.4455, lr_0 = 2.3761e-04
Loss = 3.8033e-01, PNorm = 61.9958, GNorm = 2.0724, lr_0 = 2.3745e-04
Loss = 3.6406e-01, PNorm = 61.9978, GNorm = 2.1123, lr_0 = 2.3728e-04
Loss = 3.1910e-01, PNorm = 61.9991, GNorm = 1.2246, lr_0 = 2.3712e-04
Loss = 3.9132e-01, PNorm = 61.9990, GNorm = 1.3104, lr_0 = 2.3696e-04
Loss = 3.7996e-01, PNorm = 62.0023, GNorm = 1.3800, lr_0 = 2.3680e-04
Loss = 3.6097e-01, PNorm = 62.0050, GNorm = 0.9797, lr_0 = 2.3663e-04
Loss = 3.6558e-01, PNorm = 62.0052, GNorm = 1.1858, lr_0 = 2.3647e-04
Loss = 3.6172e-01, PNorm = 62.0034, GNorm = 1.2620, lr_0 = 2.3631e-04
Loss = 3.7167e-01, PNorm = 62.0054, GNorm = 1.9915, lr_0 = 2.3615e-04
Loss = 3.7476e-01, PNorm = 62.0115, GNorm = 1.1909, lr_0 = 2.3599e-04
Loss = 3.6678e-01, PNorm = 62.0148, GNorm = 1.1579, lr_0 = 2.3582e-04
Loss = 4.3260e-01, PNorm = 62.0131, GNorm = 1.5658, lr_0 = 2.3566e-04
Loss = 3.8484e-01, PNorm = 62.0141, GNorm = 1.2698, lr_0 = 2.3550e-04
Loss = 4.0767e-01, PNorm = 62.0187, GNorm = 1.2559, lr_0 = 2.3534e-04
Loss = 4.2957e-01, PNorm = 62.0216, GNorm = 1.2224, lr_0 = 2.3518e-04
Loss = 3.6433e-01, PNorm = 62.0250, GNorm = 0.9523, lr_0 = 2.3502e-04
Loss = 3.5828e-01, PNorm = 62.0299, GNorm = 1.3873, lr_0 = 2.3486e-04
Loss = 4.1657e-01, PNorm = 62.0333, GNorm = 1.6823, lr_0 = 2.3470e-04
Loss = 3.5978e-01, PNorm = 62.0353, GNorm = 1.4526, lr_0 = 2.3454e-04
Loss = 3.8479e-01, PNorm = 62.0384, GNorm = 1.2707, lr_0 = 2.3437e-04
Loss = 3.7638e-01, PNorm = 62.0407, GNorm = 1.3474, lr_0 = 2.3421e-04
Loss = 4.2245e-01, PNorm = 62.0442, GNorm = 1.4639, lr_0 = 2.3405e-04
Loss = 3.7211e-01, PNorm = 62.0470, GNorm = 1.5686, lr_0 = 2.3389e-04
Loss = 3.6115e-01, PNorm = 62.0479, GNorm = 1.3850, lr_0 = 2.3373e-04
Loss = 3.6809e-01, PNorm = 62.0500, GNorm = 1.3576, lr_0 = 2.3357e-04
Loss = 3.5682e-01, PNorm = 62.0528, GNorm = 1.3469, lr_0 = 2.3341e-04
Loss = 3.4893e-01, PNorm = 62.0551, GNorm = 1.4168, lr_0 = 2.3325e-04
Loss = 3.1047e-01, PNorm = 62.0571, GNorm = 1.1151, lr_0 = 2.3309e-04
Loss = 3.4742e-01, PNorm = 62.0557, GNorm = 1.3867, lr_0 = 2.3293e-04
Loss = 3.3392e-01, PNorm = 62.0549, GNorm = 1.3342, lr_0 = 2.3277e-04
Loss = 3.4701e-01, PNorm = 62.0606, GNorm = 1.0835, lr_0 = 2.3261e-04
Loss = 3.4160e-01, PNorm = 62.0661, GNorm = 1.2881, lr_0 = 2.3246e-04
Loss = 3.1559e-01, PNorm = 62.0681, GNorm = 1.2823, lr_0 = 2.3230e-04
Loss = 3.6774e-01, PNorm = 62.0685, GNorm = 1.3586, lr_0 = 2.3214e-04
Loss = 4.1739e-01, PNorm = 62.0701, GNorm = 1.6748, lr_0 = 2.3198e-04
Loss = 3.7919e-01, PNorm = 62.0729, GNorm = 1.9833, lr_0 = 2.3182e-04
Loss = 3.7265e-01, PNorm = 62.0793, GNorm = 1.0954, lr_0 = 2.3166e-04
Loss = 3.1953e-01, PNorm = 62.0838, GNorm = 1.1787, lr_0 = 2.3150e-04
Loss = 4.0913e-01, PNorm = 62.0876, GNorm = 1.0923, lr_0 = 2.3134e-04
Loss = 3.7774e-01, PNorm = 62.0902, GNorm = 1.1847, lr_0 = 2.3118e-04
Loss = 3.4652e-01, PNorm = 62.0929, GNorm = 1.1668, lr_0 = 2.3103e-04
Loss = 3.7201e-01, PNorm = 62.0968, GNorm = 2.2697, lr_0 = 2.3087e-04
Loss = 3.8274e-01, PNorm = 62.0989, GNorm = 1.1514, lr_0 = 2.3071e-04
Loss = 4.2246e-01, PNorm = 62.0981, GNorm = 1.7728, lr_0 = 2.3055e-04
Loss = 3.9256e-01, PNorm = 62.1010, GNorm = 2.0416, lr_0 = 2.3039e-04
Loss = 3.4531e-01, PNorm = 62.1053, GNorm = 1.3805, lr_0 = 2.3024e-04
Loss = 3.2694e-01, PNorm = 62.1083, GNorm = 1.3335, lr_0 = 2.3008e-04
Loss = 3.5990e-01, PNorm = 62.1135, GNorm = 1.4955, lr_0 = 2.2992e-04
Loss = 3.0650e-01, PNorm = 62.1143, GNorm = 1.3101, lr_0 = 2.2976e-04
Loss = 3.6356e-01, PNorm = 62.1131, GNorm = 1.9288, lr_0 = 2.2961e-04
Loss = 4.2268e-01, PNorm = 62.1147, GNorm = 1.5770, lr_0 = 2.2945e-04
Loss = 3.3811e-01, PNorm = 62.1209, GNorm = 1.6604, lr_0 = 2.2929e-04
Loss = 3.7136e-01, PNorm = 62.1248, GNorm = 1.9002, lr_0 = 2.2913e-04
Loss = 3.7875e-01, PNorm = 62.1284, GNorm = 1.0514, lr_0 = 2.2898e-04
Loss = 3.5523e-01, PNorm = 62.1303, GNorm = 1.0746, lr_0 = 2.2882e-04
Loss = 3.6079e-01, PNorm = 62.1331, GNorm = 1.4609, lr_0 = 2.2866e-04
Loss = 3.6070e-01, PNorm = 62.1320, GNorm = 1.3195, lr_0 = 2.2851e-04
Loss = 3.7188e-01, PNorm = 62.1328, GNorm = 1.1460, lr_0 = 2.2835e-04
Loss = 3.9520e-01, PNorm = 62.1362, GNorm = 1.6506, lr_0 = 2.2819e-04
Loss = 4.3156e-01, PNorm = 62.1386, GNorm = 1.3859, lr_0 = 2.2804e-04
Loss = 3.4234e-01, PNorm = 62.1399, GNorm = 0.8313, lr_0 = 2.2788e-04
Loss = 3.4932e-01, PNorm = 62.1428, GNorm = 2.2388, lr_0 = 2.2773e-04
Loss = 3.9421e-01, PNorm = 62.1475, GNorm = 1.5556, lr_0 = 2.2757e-04
Validation mae = 0.112696
Epoch 20
Loss = 3.6456e-01, PNorm = 62.1509, GNorm = 1.3622, lr_0 = 2.2741e-04
Loss = 4.1211e-01, PNorm = 62.1560, GNorm = 1.9893, lr_0 = 2.2726e-04
Loss = 3.9854e-01, PNorm = 62.1553, GNorm = 1.2607, lr_0 = 2.2710e-04
Loss = 3.4684e-01, PNorm = 62.1570, GNorm = 1.3233, lr_0 = 2.2695e-04
Loss = 3.0730e-01, PNorm = 62.1601, GNorm = 1.4847, lr_0 = 2.2679e-04
Loss = 3.8847e-01, PNorm = 62.1605, GNorm = 1.1196, lr_0 = 2.2664e-04
Loss = 3.5165e-01, PNorm = 62.1665, GNorm = 1.3162, lr_0 = 2.2648e-04
Loss = 3.9936e-01, PNorm = 62.1727, GNorm = 1.1239, lr_0 = 2.2632e-04
Loss = 3.4262e-01, PNorm = 62.1752, GNorm = 1.3580, lr_0 = 2.2617e-04
Loss = 3.7987e-01, PNorm = 62.1776, GNorm = 1.4099, lr_0 = 2.2601e-04
Loss = 4.1836e-01, PNorm = 62.1770, GNorm = 1.0837, lr_0 = 2.2586e-04
Loss = 3.8526e-01, PNorm = 62.1767, GNorm = 1.4362, lr_0 = 2.2571e-04
Loss = 3.7214e-01, PNorm = 62.1818, GNorm = 1.6276, lr_0 = 2.2555e-04
Loss = 3.9719e-01, PNorm = 62.1835, GNorm = 1.7809, lr_0 = 2.2540e-04
Loss = 3.3749e-01, PNorm = 62.1881, GNorm = 1.3438, lr_0 = 2.2524e-04
Loss = 3.1581e-01, PNorm = 62.1908, GNorm = 1.8532, lr_0 = 2.2509e-04
Loss = 3.3210e-01, PNorm = 62.1933, GNorm = 1.4291, lr_0 = 2.2493e-04
Loss = 3.3488e-01, PNorm = 62.1961, GNorm = 0.9209, lr_0 = 2.2478e-04
Loss = 3.1545e-01, PNorm = 62.1992, GNorm = 1.2127, lr_0 = 2.2463e-04
Loss = 3.7512e-01, PNorm = 62.1996, GNorm = 1.6282, lr_0 = 2.2447e-04
Loss = 4.4507e-01, PNorm = 62.1998, GNorm = 1.2716, lr_0 = 2.2432e-04
Loss = 3.6291e-01, PNorm = 62.2018, GNorm = 1.0285, lr_0 = 2.2416e-04
Loss = 3.3764e-01, PNorm = 62.2057, GNorm = 1.0413, lr_0 = 2.2401e-04
Loss = 3.6256e-01, PNorm = 62.2077, GNorm = 1.2054, lr_0 = 2.2386e-04
Loss = 3.5502e-01, PNorm = 62.2100, GNorm = 2.0727, lr_0 = 2.2370e-04
Loss = 3.4386e-01, PNorm = 62.2110, GNorm = 1.0519, lr_0 = 2.2355e-04
Loss = 3.9332e-01, PNorm = 62.2135, GNorm = 2.1155, lr_0 = 2.2340e-04
Loss = 4.3832e-01, PNorm = 62.2171, GNorm = 1.2346, lr_0 = 2.2324e-04
Loss = 3.8623e-01, PNorm = 62.2224, GNorm = 2.4973, lr_0 = 2.2309e-04
Loss = 3.1276e-01, PNorm = 62.2258, GNorm = 1.2503, lr_0 = 2.2294e-04
Loss = 3.2415e-01, PNorm = 62.2275, GNorm = 1.7362, lr_0 = 2.2279e-04
Loss = 3.4749e-01, PNorm = 62.2299, GNorm = 1.1689, lr_0 = 2.2263e-04
Loss = 4.0046e-01, PNorm = 62.2307, GNorm = 1.6443, lr_0 = 2.2248e-04
Loss = 3.9503e-01, PNorm = 62.2325, GNorm = 1.1790, lr_0 = 2.2233e-04
Loss = 3.6901e-01, PNorm = 62.2330, GNorm = 1.3561, lr_0 = 2.2218e-04
Loss = 3.4800e-01, PNorm = 62.2321, GNorm = 1.1630, lr_0 = 2.2202e-04
Loss = 3.3441e-01, PNorm = 62.2312, GNorm = 1.2898, lr_0 = 2.2187e-04
Loss = 4.1118e-01, PNorm = 62.2312, GNorm = 1.9903, lr_0 = 2.2172e-04
Loss = 4.1215e-01, PNorm = 62.2375, GNorm = 1.5593, lr_0 = 2.2157e-04
Loss = 3.3344e-01, PNorm = 62.2381, GNorm = 1.3774, lr_0 = 2.2142e-04
Loss = 3.7907e-01, PNorm = 62.2395, GNorm = 1.9562, lr_0 = 2.2126e-04
Loss = 3.6768e-01, PNorm = 62.2418, GNorm = 1.4276, lr_0 = 2.2111e-04
Loss = 3.5468e-01, PNorm = 62.2479, GNorm = 1.5520, lr_0 = 2.2096e-04
Loss = 3.3779e-01, PNorm = 62.2506, GNorm = 1.5639, lr_0 = 2.2081e-04
Loss = 4.0722e-01, PNorm = 62.2509, GNorm = 1.3904, lr_0 = 2.2066e-04
Loss = 3.6288e-01, PNorm = 62.2516, GNorm = 1.5903, lr_0 = 2.2051e-04
Loss = 3.8999e-01, PNorm = 62.2539, GNorm = 1.4959, lr_0 = 2.2036e-04
Loss = 3.0289e-01, PNorm = 62.2567, GNorm = 1.3356, lr_0 = 2.2021e-04
Loss = 3.4118e-01, PNorm = 62.2620, GNorm = 1.5416, lr_0 = 2.2005e-04
Loss = 3.3619e-01, PNorm = 62.2619, GNorm = 1.6464, lr_0 = 2.1990e-04
Loss = 3.5470e-01, PNorm = 62.2647, GNorm = 1.2478, lr_0 = 2.1975e-04
Loss = 3.3358e-01, PNorm = 62.2668, GNorm = 0.7904, lr_0 = 2.1960e-04
Loss = 3.7982e-01, PNorm = 62.2688, GNorm = 1.5740, lr_0 = 2.1945e-04
Loss = 3.4384e-01, PNorm = 62.2714, GNorm = 2.1933, lr_0 = 2.1930e-04
Loss = 3.2963e-01, PNorm = 62.2768, GNorm = 1.2257, lr_0 = 2.1915e-04
Loss = 3.9193e-01, PNorm = 62.2757, GNorm = 1.4597, lr_0 = 2.1900e-04
Loss = 3.8154e-01, PNorm = 62.2779, GNorm = 1.3333, lr_0 = 2.1885e-04
Loss = 3.4371e-01, PNorm = 62.2828, GNorm = 1.7843, lr_0 = 2.1870e-04
Loss = 3.7607e-01, PNorm = 62.2831, GNorm = 1.6267, lr_0 = 2.1855e-04
Loss = 3.4849e-01, PNorm = 62.2842, GNorm = 1.4788, lr_0 = 2.1840e-04
Loss = 3.4209e-01, PNorm = 62.2871, GNorm = 1.6718, lr_0 = 2.1825e-04
Loss = 3.5439e-01, PNorm = 62.2899, GNorm = 1.3849, lr_0 = 2.1810e-04
Loss = 3.7163e-01, PNorm = 62.2888, GNorm = 1.2992, lr_0 = 2.1795e-04
Loss = 3.7364e-01, PNorm = 62.2947, GNorm = 1.8100, lr_0 = 2.1780e-04
Loss = 3.8763e-01, PNorm = 62.2949, GNorm = 1.4410, lr_0 = 2.1765e-04
Loss = 3.8388e-01, PNorm = 62.2955, GNorm = 1.4798, lr_0 = 2.1751e-04
Loss = 3.6483e-01, PNorm = 62.3017, GNorm = 0.9233, lr_0 = 2.1736e-04
Loss = 3.4475e-01, PNorm = 62.3042, GNorm = 1.5315, lr_0 = 2.1721e-04
Loss = 3.6492e-01, PNorm = 62.3058, GNorm = 1.0669, lr_0 = 2.1706e-04
Loss = 3.6566e-01, PNorm = 62.3082, GNorm = 1.2121, lr_0 = 2.1691e-04
Loss = 3.4012e-01, PNorm = 62.3095, GNorm = 1.1741, lr_0 = 2.1676e-04
Loss = 3.5399e-01, PNorm = 62.3117, GNorm = 1.9599, lr_0 = 2.1661e-04
Loss = 4.3340e-01, PNorm = 62.3150, GNorm = 1.4206, lr_0 = 2.1646e-04
Loss = 3.5596e-01, PNorm = 62.3151, GNorm = 1.5450, lr_0 = 2.1632e-04
Loss = 4.0188e-01, PNorm = 62.3173, GNorm = 1.6433, lr_0 = 2.1617e-04
Loss = 4.1605e-01, PNorm = 62.3182, GNorm = 1.6811, lr_0 = 2.1602e-04
Loss = 3.6101e-01, PNorm = 62.3233, GNorm = 1.5075, lr_0 = 2.1587e-04
Loss = 3.4875e-01, PNorm = 62.3242, GNorm = 1.6562, lr_0 = 2.1572e-04
Loss = 3.2671e-01, PNorm = 62.3241, GNorm = 2.2985, lr_0 = 2.1558e-04
Loss = 3.4071e-01, PNorm = 62.3264, GNorm = 1.3610, lr_0 = 2.1543e-04
Loss = 3.1991e-01, PNorm = 62.3306, GNorm = 1.1767, lr_0 = 2.1528e-04
Loss = 3.3339e-01, PNorm = 62.3300, GNorm = 1.4401, lr_0 = 2.1513e-04
Loss = 3.4762e-01, PNorm = 62.3295, GNorm = 1.4525, lr_0 = 2.1499e-04
Loss = 3.5245e-01, PNorm = 62.3318, GNorm = 1.3935, lr_0 = 2.1484e-04
Loss = 3.7823e-01, PNorm = 62.3329, GNorm = 1.3510, lr_0 = 2.1469e-04
Loss = 3.7105e-01, PNorm = 62.3362, GNorm = 1.4723, lr_0 = 2.1454e-04
Loss = 3.6627e-01, PNorm = 62.3378, GNorm = 1.3751, lr_0 = 2.1440e-04
Loss = 4.2797e-01, PNorm = 62.3373, GNorm = 2.3535, lr_0 = 2.1425e-04
Loss = 3.3222e-01, PNorm = 62.3404, GNorm = 1.3941, lr_0 = 2.1410e-04
Loss = 4.5558e-01, PNorm = 62.3431, GNorm = 1.4996, lr_0 = 2.1396e-04
Loss = 4.0425e-01, PNorm = 62.3482, GNorm = 1.2769, lr_0 = 2.1381e-04
Loss = 3.7651e-01, PNorm = 62.3548, GNorm = 1.8138, lr_0 = 2.1366e-04
Loss = 4.1190e-01, PNorm = 62.3553, GNorm = 1.9248, lr_0 = 2.1352e-04
Loss = 3.6051e-01, PNorm = 62.3542, GNorm = 1.3310, lr_0 = 2.1337e-04
Loss = 3.1448e-01, PNorm = 62.3562, GNorm = 1.4006, lr_0 = 2.1323e-04
Loss = 3.0578e-01, PNorm = 62.3577, GNorm = 1.6351, lr_0 = 2.1308e-04
Loss = 3.6856e-01, PNorm = 62.3612, GNorm = 1.3812, lr_0 = 2.1293e-04
Loss = 3.0323e-01, PNorm = 62.3630, GNorm = 0.9969, lr_0 = 2.1279e-04
Loss = 4.1973e-01, PNorm = 62.3648, GNorm = 1.5067, lr_0 = 2.1264e-04
Loss = 3.5924e-01, PNorm = 62.3673, GNorm = 1.8286, lr_0 = 2.1250e-04
Loss = 3.8440e-01, PNorm = 62.3720, GNorm = 1.3831, lr_0 = 2.1235e-04
Loss = 3.3677e-01, PNorm = 62.3743, GNorm = 1.3169, lr_0 = 2.1221e-04
Loss = 3.5093e-01, PNorm = 62.3759, GNorm = 1.0528, lr_0 = 2.1206e-04
Loss = 3.2032e-01, PNorm = 62.3768, GNorm = 1.3047, lr_0 = 2.1191e-04
Loss = 3.8861e-01, PNorm = 62.3812, GNorm = 1.3527, lr_0 = 2.1177e-04
Loss = 3.3996e-01, PNorm = 62.3841, GNorm = 1.3346, lr_0 = 2.1162e-04
Loss = 3.6198e-01, PNorm = 62.3869, GNorm = 1.2675, lr_0 = 2.1148e-04
Loss = 3.5550e-01, PNorm = 62.3903, GNorm = 1.1748, lr_0 = 2.1133e-04
Loss = 4.0084e-01, PNorm = 62.3910, GNorm = 1.3910, lr_0 = 2.1119e-04
Loss = 3.6644e-01, PNorm = 62.3938, GNorm = 1.3710, lr_0 = 2.1104e-04
Loss = 4.1353e-01, PNorm = 62.3990, GNorm = 2.0491, lr_0 = 2.1090e-04
Loss = 3.5388e-01, PNorm = 62.4011, GNorm = 1.3749, lr_0 = 2.1076e-04
Loss = 3.3075e-01, PNorm = 62.3979, GNorm = 1.2977, lr_0 = 2.1061e-04
Loss = 3.7816e-01, PNorm = 62.3980, GNorm = 1.7084, lr_0 = 2.1047e-04
Loss = 3.8633e-01, PNorm = 62.4028, GNorm = 1.6323, lr_0 = 2.1032e-04
Loss = 4.1182e-01, PNorm = 62.4046, GNorm = 2.3912, lr_0 = 2.1018e-04
Loss = 4.0063e-01, PNorm = 62.4080, GNorm = 2.5356, lr_0 = 2.1003e-04
Loss = 3.5029e-01, PNorm = 62.4109, GNorm = 1.0974, lr_0 = 2.0989e-04
Loss = 3.3109e-01, PNorm = 62.4139, GNorm = 1.3778, lr_0 = 2.0975e-04
Loss = 3.1767e-01, PNorm = 62.4178, GNorm = 1.4949, lr_0 = 2.0960e-04
Validation mae = 0.112110
Epoch 21
Loss = 3.4433e-01, PNorm = 62.4173, GNorm = 1.1779, lr_0 = 2.0946e-04
Loss = 3.9856e-01, PNorm = 62.4196, GNorm = 1.7734, lr_0 = 2.0932e-04
Loss = 3.6370e-01, PNorm = 62.4213, GNorm = 1.3891, lr_0 = 2.0917e-04
Loss = 3.6181e-01, PNorm = 62.4230, GNorm = 1.0558, lr_0 = 2.0903e-04
Loss = 3.4100e-01, PNorm = 62.4284, GNorm = 1.6596, lr_0 = 2.0889e-04
Loss = 2.8407e-01, PNorm = 62.4298, GNorm = 1.0988, lr_0 = 2.0874e-04
Loss = 3.5460e-01, PNorm = 62.4333, GNorm = 1.6984, lr_0 = 2.0860e-04
Loss = 3.3534e-01, PNorm = 62.4356, GNorm = 1.4758, lr_0 = 2.0846e-04
Loss = 4.1035e-01, PNorm = 62.4365, GNorm = 1.3111, lr_0 = 2.0831e-04
Loss = 3.8217e-01, PNorm = 62.4375, GNorm = 1.6342, lr_0 = 2.0817e-04
Loss = 3.3873e-01, PNorm = 62.4399, GNorm = 1.4091, lr_0 = 2.0803e-04
Loss = 4.3174e-01, PNorm = 62.4437, GNorm = 1.4220, lr_0 = 2.0789e-04
Loss = 3.3600e-01, PNorm = 62.4457, GNorm = 1.3321, lr_0 = 2.0774e-04
Loss = 3.3167e-01, PNorm = 62.4497, GNorm = 1.6692, lr_0 = 2.0760e-04
Loss = 3.4443e-01, PNorm = 62.4511, GNorm = 1.1992, lr_0 = 2.0746e-04
Loss = 3.5224e-01, PNorm = 62.4554, GNorm = 1.3814, lr_0 = 2.0732e-04
Loss = 4.1889e-01, PNorm = 62.4598, GNorm = 1.5216, lr_0 = 2.0718e-04
Loss = 3.4003e-01, PNorm = 62.4621, GNorm = 1.7462, lr_0 = 2.0703e-04
Loss = 3.4948e-01, PNorm = 62.4632, GNorm = 1.1275, lr_0 = 2.0689e-04
Loss = 3.4996e-01, PNorm = 62.4631, GNorm = 1.4852, lr_0 = 2.0675e-04
Loss = 3.5129e-01, PNorm = 62.4632, GNorm = 2.1339, lr_0 = 2.0661e-04
Loss = 3.2725e-01, PNorm = 62.4648, GNorm = 1.4174, lr_0 = 2.0647e-04
Loss = 3.2908e-01, PNorm = 62.4683, GNorm = 1.4677, lr_0 = 2.0633e-04
Loss = 3.4624e-01, PNorm = 62.4712, GNorm = 1.6804, lr_0 = 2.0618e-04
Loss = 3.8575e-01, PNorm = 62.4721, GNorm = 2.1205, lr_0 = 2.0604e-04
Loss = 3.3603e-01, PNorm = 62.4739, GNorm = 1.1218, lr_0 = 2.0590e-04
Loss = 3.8074e-01, PNorm = 62.4771, GNorm = 2.2614, lr_0 = 2.0576e-04
Loss = 3.9026e-01, PNorm = 62.4773, GNorm = 1.3657, lr_0 = 2.0562e-04
Loss = 3.8093e-01, PNorm = 62.4793, GNorm = 1.8110, lr_0 = 2.0548e-04
Loss = 3.2300e-01, PNorm = 62.4826, GNorm = 1.1986, lr_0 = 2.0534e-04
Loss = 3.6744e-01, PNorm = 62.4848, GNorm = 1.2696, lr_0 = 2.0520e-04
Loss = 3.9059e-01, PNorm = 62.4858, GNorm = 1.1560, lr_0 = 2.0506e-04
Loss = 3.8450e-01, PNorm = 62.4890, GNorm = 1.7373, lr_0 = 2.0492e-04
Loss = 3.8350e-01, PNorm = 62.4899, GNorm = 1.4736, lr_0 = 2.0478e-04
Loss = 3.6949e-01, PNorm = 62.4909, GNorm = 1.0827, lr_0 = 2.0464e-04
Loss = 3.6559e-01, PNorm = 62.4934, GNorm = 1.8324, lr_0 = 2.0450e-04
Loss = 3.7269e-01, PNorm = 62.4963, GNorm = 1.5605, lr_0 = 2.0436e-04
Loss = 3.7499e-01, PNorm = 62.4952, GNorm = 1.3391, lr_0 = 2.0422e-04
Loss = 4.1801e-01, PNorm = 62.4979, GNorm = 1.2091, lr_0 = 2.0408e-04
Loss = 3.5071e-01, PNorm = 62.5028, GNorm = 1.4454, lr_0 = 2.0394e-04
Loss = 3.4990e-01, PNorm = 62.5038, GNorm = 1.4973, lr_0 = 2.0380e-04
Loss = 3.5092e-01, PNorm = 62.5061, GNorm = 1.2454, lr_0 = 2.0366e-04
Loss = 3.0974e-01, PNorm = 62.5087, GNorm = 1.2393, lr_0 = 2.0352e-04
Loss = 3.9436e-01, PNorm = 62.5069, GNorm = 2.3734, lr_0 = 2.0338e-04
Loss = 3.7194e-01, PNorm = 62.5078, GNorm = 1.2970, lr_0 = 2.0324e-04
Loss = 3.7673e-01, PNorm = 62.5113, GNorm = 0.8977, lr_0 = 2.0310e-04
Loss = 3.8676e-01, PNorm = 62.5150, GNorm = 1.7250, lr_0 = 2.0296e-04
Loss = 4.0539e-01, PNorm = 62.5194, GNorm = 1.6691, lr_0 = 2.0282e-04
Loss = 3.7955e-01, PNorm = 62.5194, GNorm = 1.6249, lr_0 = 2.0268e-04
Loss = 3.6453e-01, PNorm = 62.5197, GNorm = 1.4717, lr_0 = 2.0254e-04
Loss = 3.3171e-01, PNorm = 62.5232, GNorm = 1.4744, lr_0 = 2.0240e-04
Loss = 3.5695e-01, PNorm = 62.5278, GNorm = 1.0951, lr_0 = 2.0227e-04
Loss = 3.9717e-01, PNorm = 62.5321, GNorm = 1.4746, lr_0 = 2.0213e-04
Loss = 3.4394e-01, PNorm = 62.5321, GNorm = 1.8788, lr_0 = 2.0199e-04
Loss = 3.3050e-01, PNorm = 62.5346, GNorm = 1.5530, lr_0 = 2.0185e-04
Loss = 3.4683e-01, PNorm = 62.5385, GNorm = 1.6679, lr_0 = 2.0171e-04
Loss = 3.5378e-01, PNorm = 62.5400, GNorm = 2.2669, lr_0 = 2.0157e-04
Loss = 3.8888e-01, PNorm = 62.5395, GNorm = 1.4396, lr_0 = 2.0144e-04
Loss = 3.9143e-01, PNorm = 62.5408, GNorm = 1.6010, lr_0 = 2.0130e-04
Loss = 4.1504e-01, PNorm = 62.5440, GNorm = 1.6217, lr_0 = 2.0116e-04
Loss = 3.3774e-01, PNorm = 62.5441, GNorm = 1.2957, lr_0 = 2.0102e-04
Loss = 3.8699e-01, PNorm = 62.5486, GNorm = 1.4306, lr_0 = 2.0088e-04
Loss = 3.3853e-01, PNorm = 62.5506, GNorm = 1.4663, lr_0 = 2.0075e-04
Loss = 3.4800e-01, PNorm = 62.5487, GNorm = 1.3132, lr_0 = 2.0061e-04
Loss = 4.0856e-01, PNorm = 62.5500, GNorm = 1.8143, lr_0 = 2.0047e-04
Loss = 3.4472e-01, PNorm = 62.5523, GNorm = 1.4364, lr_0 = 2.0033e-04
Loss = 3.6948e-01, PNorm = 62.5558, GNorm = 1.6428, lr_0 = 2.0020e-04
Loss = 3.6189e-01, PNorm = 62.5588, GNorm = 1.3391, lr_0 = 2.0006e-04
Loss = 3.7423e-01, PNorm = 62.5607, GNorm = 1.1741, lr_0 = 1.9992e-04
Loss = 3.5007e-01, PNorm = 62.5602, GNorm = 1.1215, lr_0 = 1.9979e-04
Loss = 3.6496e-01, PNorm = 62.5628, GNorm = 1.2577, lr_0 = 1.9965e-04
Loss = 3.7251e-01, PNorm = 62.5664, GNorm = 1.2467, lr_0 = 1.9951e-04
Loss = 3.3766e-01, PNorm = 62.5697, GNorm = 1.7508, lr_0 = 1.9938e-04
Loss = 3.0983e-01, PNorm = 62.5718, GNorm = 1.2334, lr_0 = 1.9924e-04
Loss = 3.6947e-01, PNorm = 62.5714, GNorm = 1.7450, lr_0 = 1.9910e-04
Loss = 3.7712e-01, PNorm = 62.5750, GNorm = 1.6673, lr_0 = 1.9897e-04
Loss = 3.5648e-01, PNorm = 62.5775, GNorm = 1.6189, lr_0 = 1.9883e-04
Loss = 3.6714e-01, PNorm = 62.5803, GNorm = 1.8170, lr_0 = 1.9869e-04
Loss = 3.7321e-01, PNorm = 62.5830, GNorm = 1.4843, lr_0 = 1.9856e-04
Loss = 3.2289e-01, PNorm = 62.5838, GNorm = 1.4061, lr_0 = 1.9842e-04
Loss = 3.4035e-01, PNorm = 62.5835, GNorm = 1.4034, lr_0 = 1.9829e-04
Loss = 3.1840e-01, PNorm = 62.5841, GNorm = 2.1341, lr_0 = 1.9815e-04
Loss = 4.0262e-01, PNorm = 62.5836, GNorm = 1.3112, lr_0 = 1.9801e-04
Loss = 3.3740e-01, PNorm = 62.5887, GNorm = 1.2370, lr_0 = 1.9788e-04
Loss = 3.8920e-01, PNorm = 62.5946, GNorm = 1.1039, lr_0 = 1.9774e-04
Loss = 3.2354e-01, PNorm = 62.5952, GNorm = 2.0385, lr_0 = 1.9761e-04
Loss = 3.4254e-01, PNorm = 62.5936, GNorm = 1.4798, lr_0 = 1.9747e-04
Loss = 3.5936e-01, PNorm = 62.5958, GNorm = 1.6663, lr_0 = 1.9734e-04
Loss = 3.1405e-01, PNorm = 62.5993, GNorm = 2.4380, lr_0 = 1.9720e-04
Loss = 3.2831e-01, PNorm = 62.5995, GNorm = 1.3707, lr_0 = 1.9707e-04
Loss = 3.1953e-01, PNorm = 62.6016, GNorm = 1.7139, lr_0 = 1.9693e-04
Loss = 3.8957e-01, PNorm = 62.6021, GNorm = 1.3509, lr_0 = 1.9680e-04
Loss = 3.6568e-01, PNorm = 62.6015, GNorm = 2.5040, lr_0 = 1.9666e-04
Loss = 3.8956e-01, PNorm = 62.6031, GNorm = 1.5420, lr_0 = 1.9653e-04
Loss = 3.7007e-01, PNorm = 62.6035, GNorm = 1.1651, lr_0 = 1.9639e-04
Loss = 3.6140e-01, PNorm = 62.6073, GNorm = 1.5155, lr_0 = 1.9626e-04
Loss = 3.8783e-01, PNorm = 62.6100, GNorm = 1.6157, lr_0 = 1.9612e-04
Loss = 3.8755e-01, PNorm = 62.6141, GNorm = 1.2513, lr_0 = 1.9599e-04
Loss = 4.1291e-01, PNorm = 62.6184, GNorm = 2.0135, lr_0 = 1.9585e-04
Loss = 3.1137e-01, PNorm = 62.6197, GNorm = 1.2396, lr_0 = 1.9572e-04
Loss = 3.5782e-01, PNorm = 62.6224, GNorm = 1.5796, lr_0 = 1.9559e-04
Loss = 3.7122e-01, PNorm = 62.6269, GNorm = 1.0440, lr_0 = 1.9545e-04
Loss = 3.8903e-01, PNorm = 62.6285, GNorm = 2.4096, lr_0 = 1.9532e-04
Loss = 3.9289e-01, PNorm = 62.6326, GNorm = 1.4813, lr_0 = 1.9518e-04
Loss = 3.5688e-01, PNorm = 62.6345, GNorm = 1.2798, lr_0 = 1.9505e-04
Loss = 3.5650e-01, PNorm = 62.6354, GNorm = 1.2818, lr_0 = 1.9492e-04
Loss = 3.7189e-01, PNorm = 62.6375, GNorm = 1.0998, lr_0 = 1.9478e-04
Loss = 3.3292e-01, PNorm = 62.6416, GNorm = 1.6929, lr_0 = 1.9465e-04
Loss = 3.5922e-01, PNorm = 62.6420, GNorm = 1.1650, lr_0 = 1.9452e-04
Loss = 3.8712e-01, PNorm = 62.6420, GNorm = 3.0548, lr_0 = 1.9438e-04
Loss = 4.2590e-01, PNorm = 62.6459, GNorm = 1.8258, lr_0 = 1.9425e-04
Loss = 3.6683e-01, PNorm = 62.6473, GNorm = 1.4074, lr_0 = 1.9412e-04
Loss = 2.9794e-01, PNorm = 62.6508, GNorm = 1.1777, lr_0 = 1.9398e-04
Loss = 3.4363e-01, PNorm = 62.6547, GNorm = 1.8491, lr_0 = 1.9385e-04
Loss = 3.4407e-01, PNorm = 62.6547, GNorm = 2.3227, lr_0 = 1.9372e-04
Loss = 3.5505e-01, PNorm = 62.6549, GNorm = 2.0977, lr_0 = 1.9359e-04
Loss = 3.5850e-01, PNorm = 62.6572, GNorm = 1.9150, lr_0 = 1.9345e-04
Loss = 3.7763e-01, PNorm = 62.6569, GNorm = 1.3538, lr_0 = 1.9332e-04
Loss = 3.5383e-01, PNorm = 62.6597, GNorm = 1.3275, lr_0 = 1.9319e-04
Loss = 3.4605e-01, PNorm = 62.6618, GNorm = 1.2873, lr_0 = 1.9306e-04
Validation mae = 0.111655
Epoch 22
Loss = 3.4366e-01, PNorm = 62.6635, GNorm = 2.2028, lr_0 = 1.9292e-04
Loss = 3.4833e-01, PNorm = 62.6646, GNorm = 1.3832, lr_0 = 1.9279e-04
Loss = 3.4447e-01, PNorm = 62.6683, GNorm = 1.8705, lr_0 = 1.9266e-04
Loss = 3.6764e-01, PNorm = 62.6703, GNorm = 1.2514, lr_0 = 1.9253e-04
Loss = 3.2322e-01, PNorm = 62.6748, GNorm = 1.4536, lr_0 = 1.9240e-04
Loss = 3.5034e-01, PNorm = 62.6800, GNorm = 1.3557, lr_0 = 1.9226e-04
Loss = 3.3895e-01, PNorm = 62.6838, GNorm = 1.3312, lr_0 = 1.9213e-04
Loss = 3.2200e-01, PNorm = 62.6869, GNorm = 1.2792, lr_0 = 1.9200e-04
Loss = 3.4206e-01, PNorm = 62.6870, GNorm = 1.3252, lr_0 = 1.9187e-04
Loss = 3.8849e-01, PNorm = 62.6880, GNorm = 1.2238, lr_0 = 1.9174e-04
Loss = 3.7882e-01, PNorm = 62.6903, GNorm = 1.6382, lr_0 = 1.9161e-04
Loss = 3.7096e-01, PNorm = 62.6908, GNorm = 1.4720, lr_0 = 1.9148e-04
Loss = 3.6691e-01, PNorm = 62.6931, GNorm = 1.2200, lr_0 = 1.9134e-04
Loss = 3.7071e-01, PNorm = 62.6957, GNorm = 1.9198, lr_0 = 1.9121e-04
Loss = 3.9577e-01, PNorm = 62.7003, GNorm = 1.5373, lr_0 = 1.9108e-04
Loss = 3.8541e-01, PNorm = 62.6998, GNorm = 1.1724, lr_0 = 1.9095e-04
Loss = 3.4230e-01, PNorm = 62.7002, GNorm = 1.4337, lr_0 = 1.9082e-04
Loss = 4.0404e-01, PNorm = 62.7038, GNorm = 1.4591, lr_0 = 1.9069e-04
Loss = 3.4073e-01, PNorm = 62.7074, GNorm = 1.3447, lr_0 = 1.9056e-04
Loss = 3.2018e-01, PNorm = 62.7074, GNorm = 1.3176, lr_0 = 1.9043e-04
Loss = 3.3483e-01, PNorm = 62.7057, GNorm = 2.0523, lr_0 = 1.9030e-04
Loss = 3.8165e-01, PNorm = 62.7090, GNorm = 1.6140, lr_0 = 1.9017e-04
Loss = 3.5892e-01, PNorm = 62.7104, GNorm = 2.0115, lr_0 = 1.9004e-04
Loss = 3.4121e-01, PNorm = 62.7135, GNorm = 2.2707, lr_0 = 1.8991e-04
Loss = 3.8424e-01, PNorm = 62.7166, GNorm = 1.3028, lr_0 = 1.8978e-04
Loss = 3.6787e-01, PNorm = 62.7166, GNorm = 1.1284, lr_0 = 1.8965e-04
Loss = 3.6889e-01, PNorm = 62.7179, GNorm = 1.1459, lr_0 = 1.8952e-04
Loss = 3.6797e-01, PNorm = 62.7194, GNorm = 1.4811, lr_0 = 1.8939e-04
Loss = 3.4399e-01, PNorm = 62.7202, GNorm = 2.4087, lr_0 = 1.8926e-04
Loss = 3.6500e-01, PNorm = 62.7215, GNorm = 1.2796, lr_0 = 1.8913e-04
Loss = 3.2351e-01, PNorm = 62.7234, GNorm = 1.4375, lr_0 = 1.8900e-04
Loss = 3.8347e-01, PNorm = 62.7261, GNorm = 1.5212, lr_0 = 1.8887e-04
Loss = 3.5386e-01, PNorm = 62.7273, GNorm = 1.5627, lr_0 = 1.8874e-04
Loss = 3.2471e-01, PNorm = 62.7318, GNorm = 1.3279, lr_0 = 1.8861e-04
Loss = 3.3679e-01, PNorm = 62.7334, GNorm = 1.2909, lr_0 = 1.8848e-04
Loss = 3.4996e-01, PNorm = 62.7333, GNorm = 1.2907, lr_0 = 1.8835e-04
Loss = 3.4339e-01, PNorm = 62.7337, GNorm = 1.9932, lr_0 = 1.8822e-04
Loss = 3.9004e-01, PNorm = 62.7315, GNorm = 1.4649, lr_0 = 1.8809e-04
Loss = 3.9861e-01, PNorm = 62.7337, GNorm = 1.5704, lr_0 = 1.8797e-04
Loss = 3.7596e-01, PNorm = 62.7364, GNorm = 1.1296, lr_0 = 1.8784e-04
Loss = 3.4706e-01, PNorm = 62.7365, GNorm = 1.3532, lr_0 = 1.8771e-04
Loss = 3.7665e-01, PNorm = 62.7360, GNorm = 2.2519, lr_0 = 1.8758e-04
Loss = 3.7799e-01, PNorm = 62.7369, GNorm = 1.0916, lr_0 = 1.8745e-04
Loss = 3.6676e-01, PNorm = 62.7402, GNorm = 1.4238, lr_0 = 1.8732e-04
Loss = 3.1183e-01, PNorm = 62.7418, GNorm = 1.6196, lr_0 = 1.8719e-04
Loss = 4.0673e-01, PNorm = 62.7422, GNorm = 1.5976, lr_0 = 1.8707e-04
Loss = 3.3275e-01, PNorm = 62.7426, GNorm = 1.2961, lr_0 = 1.8694e-04
Loss = 3.7200e-01, PNorm = 62.7436, GNorm = 1.4732, lr_0 = 1.8681e-04
Loss = 3.8147e-01, PNorm = 62.7443, GNorm = 1.1991, lr_0 = 1.8668e-04
Loss = 3.5132e-01, PNorm = 62.7469, GNorm = 1.8680, lr_0 = 1.8655e-04
Loss = 3.3839e-01, PNorm = 62.7502, GNorm = 1.6522, lr_0 = 1.8643e-04
Loss = 3.8890e-01, PNorm = 62.7513, GNorm = 1.8256, lr_0 = 1.8630e-04
Loss = 3.4150e-01, PNorm = 62.7512, GNorm = 2.2952, lr_0 = 1.8617e-04
Loss = 3.8602e-01, PNorm = 62.7543, GNorm = 1.4028, lr_0 = 1.8604e-04
Loss = 3.1782e-01, PNorm = 62.7563, GNorm = 1.1129, lr_0 = 1.8592e-04
Loss = 3.6785e-01, PNorm = 62.7561, GNorm = 1.2008, lr_0 = 1.8579e-04
Loss = 3.5106e-01, PNorm = 62.7592, GNorm = 1.7735, lr_0 = 1.8566e-04
Loss = 3.6849e-01, PNorm = 62.7615, GNorm = 2.3085, lr_0 = 1.8553e-04
Loss = 3.2614e-01, PNorm = 62.7636, GNorm = 1.1943, lr_0 = 1.8541e-04
Loss = 3.3434e-01, PNorm = 62.7661, GNorm = 1.4332, lr_0 = 1.8528e-04
Loss = 3.7476e-01, PNorm = 62.7684, GNorm = 1.4144, lr_0 = 1.8515e-04
Loss = 3.4779e-01, PNorm = 62.7712, GNorm = 1.4777, lr_0 = 1.8503e-04
Loss = 4.1097e-01, PNorm = 62.7749, GNorm = 1.3714, lr_0 = 1.8490e-04
Loss = 3.7013e-01, PNorm = 62.7753, GNorm = 1.3423, lr_0 = 1.8477e-04
Loss = 3.5400e-01, PNorm = 62.7791, GNorm = 1.3762, lr_0 = 1.8465e-04
Loss = 3.2403e-01, PNorm = 62.7821, GNorm = 1.2930, lr_0 = 1.8452e-04
Loss = 3.6560e-01, PNorm = 62.7826, GNorm = 1.4769, lr_0 = 1.8439e-04
Loss = 3.3097e-01, PNorm = 62.7845, GNorm = 1.2341, lr_0 = 1.8427e-04
Loss = 3.4100e-01, PNorm = 62.7867, GNorm = 1.2881, lr_0 = 1.8414e-04
Loss = 3.3828e-01, PNorm = 62.7893, GNorm = 1.2890, lr_0 = 1.8401e-04
Loss = 3.9766e-01, PNorm = 62.7916, GNorm = 1.7006, lr_0 = 1.8389e-04
Loss = 3.9341e-01, PNorm = 62.7929, GNorm = 1.2847, lr_0 = 1.8376e-04
Loss = 3.7307e-01, PNorm = 62.7954, GNorm = 1.4604, lr_0 = 1.8364e-04
Loss = 3.2998e-01, PNorm = 62.7983, GNorm = 2.0340, lr_0 = 1.8351e-04
Loss = 3.2454e-01, PNorm = 62.7982, GNorm = 1.4043, lr_0 = 1.8338e-04
Loss = 3.7517e-01, PNorm = 62.7999, GNorm = 1.4278, lr_0 = 1.8326e-04
Loss = 3.3707e-01, PNorm = 62.8011, GNorm = 1.4662, lr_0 = 1.8313e-04
Loss = 3.8298e-01, PNorm = 62.8037, GNorm = 1.8734, lr_0 = 1.8301e-04
Loss = 3.6927e-01, PNorm = 62.8050, GNorm = 1.4120, lr_0 = 1.8288e-04
Loss = 3.5478e-01, PNorm = 62.8052, GNorm = 1.1954, lr_0 = 1.8276e-04
Loss = 3.5889e-01, PNorm = 62.8062, GNorm = 1.1610, lr_0 = 1.8263e-04
Loss = 3.9717e-01, PNorm = 62.8082, GNorm = 1.9873, lr_0 = 1.8251e-04
Loss = 4.0352e-01, PNorm = 62.8110, GNorm = 1.2365, lr_0 = 1.8238e-04
Loss = 3.3765e-01, PNorm = 62.8144, GNorm = 1.5316, lr_0 = 1.8226e-04
Loss = 3.0825e-01, PNorm = 62.8163, GNorm = 1.6039, lr_0 = 1.8213e-04
Loss = 3.4312e-01, PNorm = 62.8180, GNorm = 1.2413, lr_0 = 1.8201e-04
Loss = 3.5611e-01, PNorm = 62.8193, GNorm = 1.3073, lr_0 = 1.8188e-04
Loss = 3.9455e-01, PNorm = 62.8205, GNorm = 1.3509, lr_0 = 1.8176e-04
Loss = 3.5280e-01, PNorm = 62.8213, GNorm = 1.6567, lr_0 = 1.8163e-04
Loss = 3.8650e-01, PNorm = 62.8218, GNorm = 1.5006, lr_0 = 1.8151e-04
Loss = 3.7655e-01, PNorm = 62.8245, GNorm = 1.9673, lr_0 = 1.8138e-04
Loss = 3.6242e-01, PNorm = 62.8279, GNorm = 1.3454, lr_0 = 1.8126e-04
Loss = 3.6738e-01, PNorm = 62.8297, GNorm = 2.6096, lr_0 = 1.8114e-04
Loss = 3.1064e-01, PNorm = 62.8337, GNorm = 2.1964, lr_0 = 1.8101e-04
Loss = 3.0684e-01, PNorm = 62.8368, GNorm = 2.1480, lr_0 = 1.8089e-04
Loss = 3.9109e-01, PNorm = 62.8380, GNorm = 1.4159, lr_0 = 1.8076e-04
Loss = 3.5515e-01, PNorm = 62.8424, GNorm = 2.3143, lr_0 = 1.8064e-04
Loss = 3.4607e-01, PNorm = 62.8443, GNorm = 1.1361, lr_0 = 1.8052e-04
Loss = 2.8186e-01, PNorm = 62.8465, GNorm = 1.6831, lr_0 = 1.8039e-04
Loss = 3.1334e-01, PNorm = 62.8478, GNorm = 1.3707, lr_0 = 1.8027e-04
Loss = 3.5027e-01, PNorm = 62.8504, GNorm = 0.9570, lr_0 = 1.8015e-04
Loss = 3.8442e-01, PNorm = 62.8517, GNorm = 2.0237, lr_0 = 1.8002e-04
Loss = 4.2039e-01, PNorm = 62.8541, GNorm = 2.1876, lr_0 = 1.7990e-04
Loss = 3.5821e-01, PNorm = 62.8572, GNorm = 1.3554, lr_0 = 1.7978e-04
Loss = 3.6404e-01, PNorm = 62.8578, GNorm = 2.2885, lr_0 = 1.7965e-04
Loss = 3.4236e-01, PNorm = 62.8587, GNorm = 2.2101, lr_0 = 1.7953e-04
Loss = 3.0730e-01, PNorm = 62.8587, GNorm = 1.0190, lr_0 = 1.7941e-04
Loss = 3.8218e-01, PNorm = 62.8609, GNorm = 1.3008, lr_0 = 1.7928e-04
Loss = 3.2577e-01, PNorm = 62.8636, GNorm = 1.1047, lr_0 = 1.7916e-04
Loss = 3.7630e-01, PNorm = 62.8655, GNorm = 2.7703, lr_0 = 1.7904e-04
Loss = 3.8642e-01, PNorm = 62.8678, GNorm = 1.8694, lr_0 = 1.7892e-04
Loss = 3.5316e-01, PNorm = 62.8707, GNorm = 1.0751, lr_0 = 1.7879e-04
Loss = 3.6776e-01, PNorm = 62.8730, GNorm = 1.9056, lr_0 = 1.7867e-04
Loss = 3.8312e-01, PNorm = 62.8773, GNorm = 1.3740, lr_0 = 1.7855e-04
Loss = 3.0890e-01, PNorm = 62.8798, GNorm = 1.3496, lr_0 = 1.7843e-04
Loss = 3.7594e-01, PNorm = 62.8797, GNorm = 1.4905, lr_0 = 1.7830e-04
Loss = 3.8500e-01, PNorm = 62.8808, GNorm = 1.2795, lr_0 = 1.7818e-04
Loss = 3.3585e-01, PNorm = 62.8831, GNorm = 1.2773, lr_0 = 1.7806e-04
Loss = 3.4765e-01, PNorm = 62.8847, GNorm = 1.8822, lr_0 = 1.7794e-04
Loss = 3.6080e-01, PNorm = 62.8874, GNorm = 1.5707, lr_0 = 1.7782e-04
Validation mae = 0.111961
Epoch 23
Loss = 3.6567e-01, PNorm = 62.8864, GNorm = 1.7121, lr_0 = 1.7769e-04
Loss = 3.3165e-01, PNorm = 62.8868, GNorm = 2.1916, lr_0 = 1.7757e-04
Loss = 3.1109e-01, PNorm = 62.8889, GNorm = 1.1003, lr_0 = 1.7745e-04
Loss = 3.3925e-01, PNorm = 62.8912, GNorm = 1.1706, lr_0 = 1.7733e-04
Loss = 3.4994e-01, PNorm = 62.8914, GNorm = 1.7182, lr_0 = 1.7721e-04
Loss = 2.8429e-01, PNorm = 62.8910, GNorm = 1.2083, lr_0 = 1.7709e-04
Loss = 3.5953e-01, PNorm = 62.8966, GNorm = 1.4756, lr_0 = 1.7696e-04
Loss = 3.2762e-01, PNorm = 62.8989, GNorm = 1.3757, lr_0 = 1.7684e-04
Loss = 3.3858e-01, PNorm = 62.8989, GNorm = 1.3117, lr_0 = 1.7672e-04
Loss = 3.7463e-01, PNorm = 62.8997, GNorm = 1.7088, lr_0 = 1.7660e-04
Loss = 3.5089e-01, PNorm = 62.9018, GNorm = 1.4441, lr_0 = 1.7648e-04
Loss = 3.5128e-01, PNorm = 62.9046, GNorm = 2.6732, lr_0 = 1.7636e-04
Loss = 3.3093e-01, PNorm = 62.9052, GNorm = 1.7234, lr_0 = 1.7624e-04
Loss = 4.0305e-01, PNorm = 62.9076, GNorm = 1.1773, lr_0 = 1.7612e-04
Loss = 3.6228e-01, PNorm = 62.9095, GNorm = 1.2757, lr_0 = 1.7600e-04
Loss = 3.5173e-01, PNorm = 62.9089, GNorm = 1.2181, lr_0 = 1.7588e-04
Loss = 3.2800e-01, PNorm = 62.9120, GNorm = 1.3845, lr_0 = 1.7576e-04
Loss = 4.1474e-01, PNorm = 62.9123, GNorm = 1.3440, lr_0 = 1.7564e-04
Loss = 3.0748e-01, PNorm = 62.9153, GNorm = 1.0109, lr_0 = 1.7552e-04
Loss = 3.8293e-01, PNorm = 62.9171, GNorm = 1.3230, lr_0 = 1.7540e-04
Loss = 3.5316e-01, PNorm = 62.9185, GNorm = 1.3174, lr_0 = 1.7528e-04
Loss = 3.7893e-01, PNorm = 62.9208, GNorm = 1.4971, lr_0 = 1.7516e-04
Loss = 3.3544e-01, PNorm = 62.9237, GNorm = 1.4464, lr_0 = 1.7504e-04
Loss = 3.9421e-01, PNorm = 62.9260, GNorm = 1.8266, lr_0 = 1.7492e-04
Loss = 4.1439e-01, PNorm = 62.9275, GNorm = 5.0552, lr_0 = 1.7480e-04
Loss = 3.5031e-01, PNorm = 62.9287, GNorm = 1.3884, lr_0 = 1.7468e-04
Loss = 3.4424e-01, PNorm = 62.9309, GNorm = 1.2693, lr_0 = 1.7456e-04
Loss = 3.4842e-01, PNorm = 62.9328, GNorm = 1.5025, lr_0 = 1.7444e-04
Loss = 3.6188e-01, PNorm = 62.9339, GNorm = 1.5194, lr_0 = 1.7432e-04
Loss = 3.6248e-01, PNorm = 62.9370, GNorm = 1.8773, lr_0 = 1.7420e-04
Loss = 3.8517e-01, PNorm = 62.9389, GNorm = 1.3880, lr_0 = 1.7408e-04
Loss = 3.3606e-01, PNorm = 62.9413, GNorm = 1.4288, lr_0 = 1.7396e-04
Loss = 3.1679e-01, PNorm = 62.9440, GNorm = 0.8859, lr_0 = 1.7384e-04
Loss = 3.4213e-01, PNorm = 62.9423, GNorm = 1.7102, lr_0 = 1.7372e-04
Loss = 3.7398e-01, PNorm = 62.9433, GNorm = 1.1282, lr_0 = 1.7360e-04
Loss = 3.8808e-01, PNorm = 62.9465, GNorm = 1.7218, lr_0 = 1.7348e-04
Loss = 4.0480e-01, PNorm = 62.9450, GNorm = 1.9868, lr_0 = 1.7336e-04
Loss = 3.6522e-01, PNorm = 62.9457, GNorm = 1.7863, lr_0 = 1.7325e-04
Loss = 3.3361e-01, PNorm = 62.9495, GNorm = 0.9915, lr_0 = 1.7313e-04
Loss = 3.2179e-01, PNorm = 62.9533, GNorm = 1.2165, lr_0 = 1.7301e-04
Loss = 3.3138e-01, PNorm = 62.9555, GNorm = 1.7835, lr_0 = 1.7289e-04
Loss = 3.0574e-01, PNorm = 62.9579, GNorm = 1.7483, lr_0 = 1.7277e-04
Loss = 3.4068e-01, PNorm = 62.9591, GNorm = 1.2676, lr_0 = 1.7265e-04
Loss = 3.5238e-01, PNorm = 62.9605, GNorm = 1.4585, lr_0 = 1.7253e-04
Loss = 3.4631e-01, PNorm = 62.9639, GNorm = 2.2905, lr_0 = 1.7242e-04
Loss = 3.5466e-01, PNorm = 62.9664, GNorm = 1.2599, lr_0 = 1.7230e-04
Loss = 3.8635e-01, PNorm = 62.9657, GNorm = 1.5978, lr_0 = 1.7218e-04
Loss = 3.8976e-01, PNorm = 62.9677, GNorm = 1.5316, lr_0 = 1.7206e-04
Loss = 3.4427e-01, PNorm = 62.9703, GNorm = 1.8496, lr_0 = 1.7194e-04
Loss = 3.3933e-01, PNorm = 62.9733, GNorm = 2.0931, lr_0 = 1.7183e-04
Loss = 3.5254e-01, PNorm = 62.9730, GNorm = 1.8997, lr_0 = 1.7171e-04
Loss = 3.7051e-01, PNorm = 62.9750, GNorm = 1.5448, lr_0 = 1.7159e-04
Loss = 3.5424e-01, PNorm = 62.9770, GNorm = 1.5767, lr_0 = 1.7147e-04
Loss = 3.8699e-01, PNorm = 62.9773, GNorm = 1.9665, lr_0 = 1.7136e-04
Loss = 3.8071e-01, PNorm = 62.9793, GNorm = 1.3874, lr_0 = 1.7124e-04
Loss = 3.8796e-01, PNorm = 62.9805, GNorm = 1.5123, lr_0 = 1.7112e-04
Loss = 3.5220e-01, PNorm = 62.9810, GNorm = 2.7944, lr_0 = 1.7100e-04
Loss = 3.3741e-01, PNorm = 62.9803, GNorm = 1.2986, lr_0 = 1.7089e-04
Loss = 3.4740e-01, PNorm = 62.9823, GNorm = 1.4705, lr_0 = 1.7077e-04
Loss = 3.7434e-01, PNorm = 62.9854, GNorm = 1.3817, lr_0 = 1.7065e-04
Loss = 3.2865e-01, PNorm = 62.9883, GNorm = 1.2305, lr_0 = 1.7054e-04
Loss = 3.9025e-01, PNorm = 62.9869, GNorm = 1.3239, lr_0 = 1.7042e-04
Loss = 3.6389e-01, PNorm = 62.9887, GNorm = 1.5638, lr_0 = 1.7030e-04
Loss = 3.5954e-01, PNorm = 62.9923, GNorm = 1.3813, lr_0 = 1.7019e-04
Loss = 4.1157e-01, PNorm = 62.9941, GNorm = 1.0357, lr_0 = 1.7007e-04
Loss = 3.6310e-01, PNorm = 62.9965, GNorm = 1.8367, lr_0 = 1.6995e-04
Loss = 3.4246e-01, PNorm = 62.9989, GNorm = 1.4143, lr_0 = 1.6984e-04
Loss = 3.1097e-01, PNorm = 63.0005, GNorm = 1.6287, lr_0 = 1.6972e-04
Loss = 3.5544e-01, PNorm = 63.0032, GNorm = 1.7188, lr_0 = 1.6960e-04
Loss = 3.3894e-01, PNorm = 63.0040, GNorm = 1.5173, lr_0 = 1.6949e-04
Loss = 3.1773e-01, PNorm = 63.0064, GNorm = 1.1439, lr_0 = 1.6937e-04
Loss = 3.3638e-01, PNorm = 63.0092, GNorm = 1.7337, lr_0 = 1.6926e-04
Loss = 3.6279e-01, PNorm = 63.0109, GNorm = 1.1179, lr_0 = 1.6914e-04
Loss = 3.4013e-01, PNorm = 63.0096, GNorm = 1.0138, lr_0 = 1.6902e-04
Loss = 3.8564e-01, PNorm = 63.0088, GNorm = 2.2759, lr_0 = 1.6891e-04
Loss = 3.5183e-01, PNorm = 63.0110, GNorm = 1.1695, lr_0 = 1.6879e-04
Loss = 3.6360e-01, PNorm = 63.0132, GNorm = 1.5167, lr_0 = 1.6868e-04
Loss = 3.2468e-01, PNorm = 63.0149, GNorm = 1.4127, lr_0 = 1.6856e-04
Loss = 3.8818e-01, PNorm = 63.0180, GNorm = 1.4515, lr_0 = 1.6845e-04
Loss = 3.5248e-01, PNorm = 63.0201, GNorm = 1.5086, lr_0 = 1.6833e-04
Loss = 3.3749e-01, PNorm = 63.0222, GNorm = 1.4824, lr_0 = 1.6821e-04
Loss = 4.0276e-01, PNorm = 63.0235, GNorm = 1.5810, lr_0 = 1.6810e-04
Loss = 3.3606e-01, PNorm = 63.0277, GNorm = 1.0941, lr_0 = 1.6798e-04
Loss = 3.3300e-01, PNorm = 63.0308, GNorm = 1.5316, lr_0 = 1.6787e-04
Loss = 4.2192e-01, PNorm = 63.0304, GNorm = 1.6187, lr_0 = 1.6775e-04
Loss = 3.7417e-01, PNorm = 63.0312, GNorm = 1.3623, lr_0 = 1.6764e-04
Loss = 3.5578e-01, PNorm = 63.0321, GNorm = 1.4588, lr_0 = 1.6752e-04
Loss = 3.6464e-01, PNorm = 63.0338, GNorm = 1.1474, lr_0 = 1.6741e-04
Loss = 3.5863e-01, PNorm = 63.0350, GNorm = 0.9893, lr_0 = 1.6729e-04
Loss = 3.4177e-01, PNorm = 63.0382, GNorm = 1.8424, lr_0 = 1.6718e-04
Loss = 3.9138e-01, PNorm = 63.0391, GNorm = 1.3074, lr_0 = 1.6707e-04
Loss = 4.0555e-01, PNorm = 63.0429, GNorm = 1.8156, lr_0 = 1.6695e-04
Loss = 3.5930e-01, PNorm = 63.0480, GNorm = 1.0908, lr_0 = 1.6684e-04
Loss = 3.3142e-01, PNorm = 63.0500, GNorm = 1.2854, lr_0 = 1.6672e-04
Loss = 3.1649e-01, PNorm = 63.0531, GNorm = 1.2303, lr_0 = 1.6661e-04
Loss = 4.5003e-01, PNorm = 63.0521, GNorm = 1.7805, lr_0 = 1.6649e-04
Loss = 3.6447e-01, PNorm = 63.0507, GNorm = 1.5937, lr_0 = 1.6638e-04
Loss = 3.6299e-01, PNorm = 63.0535, GNorm = 1.6114, lr_0 = 1.6627e-04
Loss = 3.6753e-01, PNorm = 63.0563, GNorm = 1.5667, lr_0 = 1.6615e-04
Loss = 3.4093e-01, PNorm = 63.0605, GNorm = 0.9212, lr_0 = 1.6604e-04
Loss = 3.7114e-01, PNorm = 63.0630, GNorm = 1.5723, lr_0 = 1.6592e-04
Loss = 3.2965e-01, PNorm = 63.0648, GNorm = 1.2186, lr_0 = 1.6581e-04
Loss = 3.1722e-01, PNorm = 63.0667, GNorm = 1.1400, lr_0 = 1.6570e-04
Loss = 3.6381e-01, PNorm = 63.0667, GNorm = 1.3388, lr_0 = 1.6558e-04
Loss = 3.5823e-01, PNorm = 63.0665, GNorm = 1.1300, lr_0 = 1.6547e-04
Loss = 3.1071e-01, PNorm = 63.0659, GNorm = 1.6219, lr_0 = 1.6536e-04
Loss = 3.8543e-01, PNorm = 63.0671, GNorm = 1.8468, lr_0 = 1.6524e-04
Loss = 3.6533e-01, PNorm = 63.0709, GNorm = 1.3950, lr_0 = 1.6513e-04
Loss = 3.3803e-01, PNorm = 63.0731, GNorm = 1.2821, lr_0 = 1.6502e-04
Loss = 3.4693e-01, PNorm = 63.0752, GNorm = 1.7857, lr_0 = 1.6490e-04
Loss = 2.9891e-01, PNorm = 63.0776, GNorm = 1.0861, lr_0 = 1.6479e-04
Loss = 3.6739e-01, PNorm = 63.0784, GNorm = 1.5034, lr_0 = 1.6468e-04
Loss = 3.4463e-01, PNorm = 63.0771, GNorm = 1.3849, lr_0 = 1.6457e-04
Loss = 3.4593e-01, PNorm = 63.0781, GNorm = 1.6796, lr_0 = 1.6445e-04
Loss = 3.6825e-01, PNorm = 63.0801, GNorm = 1.8617, lr_0 = 1.6434e-04
Loss = 3.4497e-01, PNorm = 63.0838, GNorm = 1.2075, lr_0 = 1.6423e-04
Loss = 3.9272e-01, PNorm = 63.0865, GNorm = 1.7179, lr_0 = 1.6412e-04
Loss = 3.7798e-01, PNorm = 63.0883, GNorm = 1.8623, lr_0 = 1.6400e-04
Loss = 3.6453e-01, PNorm = 63.0895, GNorm = 1.9211, lr_0 = 1.6389e-04
Loss = 2.9839e-01, PNorm = 63.0916, GNorm = 1.2674, lr_0 = 1.6378e-04
Validation mae = 0.111437
Epoch 24
Loss = 3.1543e-01, PNorm = 63.0943, GNorm = 1.6177, lr_0 = 1.6367e-04
Loss = 3.6303e-01, PNorm = 63.0943, GNorm = 1.4728, lr_0 = 1.6355e-04
Loss = 3.1322e-01, PNorm = 63.0945, GNorm = 1.2678, lr_0 = 1.6344e-04
Loss = 3.5276e-01, PNorm = 63.0964, GNorm = 1.3847, lr_0 = 1.6333e-04
Loss = 3.6776e-01, PNorm = 63.0992, GNorm = 1.5238, lr_0 = 1.6322e-04
Loss = 3.3818e-01, PNorm = 63.1029, GNorm = 1.4562, lr_0 = 1.6311e-04
Loss = 3.4398e-01, PNorm = 63.1042, GNorm = 1.2910, lr_0 = 1.6299e-04
Loss = 3.3936e-01, PNorm = 63.1057, GNorm = 1.2177, lr_0 = 1.6288e-04
Loss = 3.2843e-01, PNorm = 63.1073, GNorm = 1.8381, lr_0 = 1.6277e-04
Loss = 3.0141e-01, PNorm = 63.1073, GNorm = 1.1357, lr_0 = 1.6266e-04
Loss = 4.0795e-01, PNorm = 63.1065, GNorm = 1.7433, lr_0 = 1.6255e-04
Loss = 3.3006e-01, PNorm = 63.1087, GNorm = 1.1439, lr_0 = 1.6244e-04
Loss = 3.3187e-01, PNorm = 63.1108, GNorm = 1.3624, lr_0 = 1.6233e-04
Loss = 3.5296e-01, PNorm = 63.1119, GNorm = 2.0231, lr_0 = 1.6221e-04
Loss = 3.6079e-01, PNorm = 63.1130, GNorm = 1.6831, lr_0 = 1.6210e-04
Loss = 3.6233e-01, PNorm = 63.1161, GNorm = 1.8869, lr_0 = 1.6199e-04
Loss = 3.7901e-01, PNorm = 63.1161, GNorm = 1.4112, lr_0 = 1.6188e-04
Loss = 3.7079e-01, PNorm = 63.1139, GNorm = 1.4692, lr_0 = 1.6177e-04
Loss = 3.4135e-01, PNorm = 63.1149, GNorm = 1.4229, lr_0 = 1.6166e-04
Loss = 3.5781e-01, PNorm = 63.1165, GNorm = 1.7179, lr_0 = 1.6155e-04
Loss = 3.7446e-01, PNorm = 63.1176, GNorm = 1.6173, lr_0 = 1.6144e-04
Loss = 3.7591e-01, PNorm = 63.1202, GNorm = 1.5616, lr_0 = 1.6133e-04
Loss = 3.2589e-01, PNorm = 63.1221, GNorm = 1.1928, lr_0 = 1.6122e-04
Loss = 3.7094e-01, PNorm = 63.1245, GNorm = 1.1481, lr_0 = 1.6111e-04
Loss = 3.1406e-01, PNorm = 63.1249, GNorm = 1.4830, lr_0 = 1.6100e-04
Loss = 3.4805e-01, PNorm = 63.1263, GNorm = 1.3669, lr_0 = 1.6089e-04
Loss = 3.6855e-01, PNorm = 63.1287, GNorm = 1.5658, lr_0 = 1.6078e-04
Loss = 3.3886e-01, PNorm = 63.1301, GNorm = 1.1633, lr_0 = 1.6067e-04
Loss = 3.3894e-01, PNorm = 63.1340, GNorm = 1.9296, lr_0 = 1.6056e-04
Loss = 3.0571e-01, PNorm = 63.1380, GNorm = 1.5428, lr_0 = 1.6045e-04
Loss = 3.3361e-01, PNorm = 63.1379, GNorm = 1.4205, lr_0 = 1.6034e-04
Loss = 3.4118e-01, PNorm = 63.1379, GNorm = 1.7799, lr_0 = 1.6023e-04
Loss = 3.5560e-01, PNorm = 63.1403, GNorm = 1.9903, lr_0 = 1.6012e-04
Loss = 3.2782e-01, PNorm = 63.1393, GNorm = 1.0480, lr_0 = 1.6001e-04
Loss = 3.7619e-01, PNorm = 63.1416, GNorm = 2.0965, lr_0 = 1.5990e-04
Loss = 3.7297e-01, PNorm = 63.1431, GNorm = 1.4685, lr_0 = 1.5979e-04
Loss = 3.4931e-01, PNorm = 63.1425, GNorm = 1.4855, lr_0 = 1.5968e-04
Loss = 3.4278e-01, PNorm = 63.1431, GNorm = 1.8085, lr_0 = 1.5957e-04
Loss = 3.4334e-01, PNorm = 63.1446, GNorm = 1.3801, lr_0 = 1.5946e-04
Loss = 3.7124e-01, PNorm = 63.1450, GNorm = 1.7433, lr_0 = 1.5935e-04
Loss = 3.6159e-01, PNorm = 63.1464, GNorm = 0.9729, lr_0 = 1.5924e-04
Loss = 3.6300e-01, PNorm = 63.1475, GNorm = 2.0501, lr_0 = 1.5913e-04
Loss = 3.0784e-01, PNorm = 63.1479, GNorm = 1.2698, lr_0 = 1.5902e-04
Loss = 3.4351e-01, PNorm = 63.1519, GNorm = 1.4495, lr_0 = 1.5891e-04
Loss = 3.5334e-01, PNorm = 63.1529, GNorm = 1.4521, lr_0 = 1.5880e-04
Loss = 4.0362e-01, PNorm = 63.1528, GNorm = 1.5388, lr_0 = 1.5870e-04
Loss = 3.4588e-01, PNorm = 63.1553, GNorm = 1.3033, lr_0 = 1.5859e-04
Loss = 3.3863e-01, PNorm = 63.1578, GNorm = 1.1606, lr_0 = 1.5848e-04
Loss = 3.5993e-01, PNorm = 63.1604, GNorm = 1.7368, lr_0 = 1.5837e-04
Loss = 3.8482e-01, PNorm = 63.1632, GNorm = 1.8512, lr_0 = 1.5826e-04
Loss = 3.5428e-01, PNorm = 63.1669, GNorm = 1.5955, lr_0 = 1.5815e-04
Loss = 3.0528e-01, PNorm = 63.1697, GNorm = 1.1396, lr_0 = 1.5804e-04
Loss = 3.2664e-01, PNorm = 63.1701, GNorm = 1.1239, lr_0 = 1.5794e-04
Loss = 3.3446e-01, PNorm = 63.1715, GNorm = 1.6396, lr_0 = 1.5783e-04
Loss = 3.1877e-01, PNorm = 63.1739, GNorm = 1.3168, lr_0 = 1.5772e-04
Loss = 3.5312e-01, PNorm = 63.1769, GNorm = 2.2331, lr_0 = 1.5761e-04
Loss = 3.3464e-01, PNorm = 63.1793, GNorm = 2.1808, lr_0 = 1.5750e-04
Loss = 3.9516e-01, PNorm = 63.1841, GNorm = 1.2558, lr_0 = 1.5740e-04
Loss = 3.9070e-01, PNorm = 63.1880, GNorm = 1.3402, lr_0 = 1.5729e-04
Loss = 3.4144e-01, PNorm = 63.1883, GNorm = 1.6576, lr_0 = 1.5718e-04
Loss = 3.6610e-01, PNorm = 63.1885, GNorm = 1.7255, lr_0 = 1.5707e-04
Loss = 3.4995e-01, PNorm = 63.1903, GNorm = 1.4617, lr_0 = 1.5697e-04
Loss = 3.9913e-01, PNorm = 63.1899, GNorm = 1.6648, lr_0 = 1.5686e-04
Loss = 4.0290e-01, PNorm = 63.1916, GNorm = 1.7297, lr_0 = 1.5675e-04
Loss = 3.3467e-01, PNorm = 63.1940, GNorm = 1.0265, lr_0 = 1.5664e-04
Loss = 3.7862e-01, PNorm = 63.1962, GNorm = 1.0025, lr_0 = 1.5654e-04
Loss = 3.7890e-01, PNorm = 63.1953, GNorm = 0.9425, lr_0 = 1.5643e-04
Loss = 3.4594e-01, PNorm = 63.1972, GNorm = 1.1017, lr_0 = 1.5632e-04
Loss = 3.8766e-01, PNorm = 63.2010, GNorm = 1.7359, lr_0 = 1.5621e-04
Loss = 3.3005e-01, PNorm = 63.2035, GNorm = 1.9579, lr_0 = 1.5611e-04
Loss = 3.2148e-01, PNorm = 63.2044, GNorm = 1.5288, lr_0 = 1.5600e-04
Loss = 3.4557e-01, PNorm = 63.2052, GNorm = 2.5901, lr_0 = 1.5589e-04
Loss = 3.6721e-01, PNorm = 63.2071, GNorm = 1.6849, lr_0 = 1.5579e-04
Loss = 3.4670e-01, PNorm = 63.2109, GNorm = 1.7345, lr_0 = 1.5568e-04
Loss = 3.3209e-01, PNorm = 63.2114, GNorm = 1.5090, lr_0 = 1.5557e-04
Loss = 3.4399e-01, PNorm = 63.2126, GNorm = 1.5906, lr_0 = 1.5547e-04
Loss = 3.6432e-01, PNorm = 63.2104, GNorm = 1.3148, lr_0 = 1.5536e-04
Loss = 3.6420e-01, PNorm = 63.2127, GNorm = 1.7740, lr_0 = 1.5525e-04
Loss = 3.3548e-01, PNorm = 63.2149, GNorm = 1.9339, lr_0 = 1.5515e-04
Loss = 3.2626e-01, PNorm = 63.2145, GNorm = 2.0708, lr_0 = 1.5504e-04
Loss = 4.1257e-01, PNorm = 63.2158, GNorm = 1.3358, lr_0 = 1.5493e-04
Loss = 3.3429e-01, PNorm = 63.2182, GNorm = 1.2725, lr_0 = 1.5483e-04
Loss = 3.8603e-01, PNorm = 63.2187, GNorm = 1.3183, lr_0 = 1.5472e-04
Loss = 3.7100e-01, PNorm = 63.2190, GNorm = 1.5569, lr_0 = 1.5462e-04
Loss = 3.9195e-01, PNorm = 63.2212, GNorm = 1.4185, lr_0 = 1.5451e-04
Loss = 3.0130e-01, PNorm = 63.2247, GNorm = 2.2754, lr_0 = 1.5440e-04
Loss = 3.5150e-01, PNorm = 63.2241, GNorm = 1.8081, lr_0 = 1.5430e-04
Loss = 3.4176e-01, PNorm = 63.2269, GNorm = 1.1848, lr_0 = 1.5419e-04
Loss = 3.1679e-01, PNorm = 63.2289, GNorm = 1.2866, lr_0 = 1.5409e-04
Loss = 3.1964e-01, PNorm = 63.2291, GNorm = 1.1986, lr_0 = 1.5398e-04
Loss = 3.5815e-01, PNorm = 63.2289, GNorm = 1.6167, lr_0 = 1.5388e-04
Loss = 3.0552e-01, PNorm = 63.2325, GNorm = 1.8067, lr_0 = 1.5377e-04
Loss = 3.6418e-01, PNorm = 63.2363, GNorm = 1.2559, lr_0 = 1.5367e-04
Loss = 3.7686e-01, PNorm = 63.2374, GNorm = 1.4562, lr_0 = 1.5356e-04
Loss = 4.0037e-01, PNorm = 63.2405, GNorm = 1.5452, lr_0 = 1.5346e-04
Loss = 3.6439e-01, PNorm = 63.2422, GNorm = 1.8889, lr_0 = 1.5335e-04
Loss = 3.6416e-01, PNorm = 63.2441, GNorm = 1.7180, lr_0 = 1.5325e-04
Loss = 3.2627e-01, PNorm = 63.2457, GNorm = 1.4060, lr_0 = 1.5314e-04
Loss = 3.4580e-01, PNorm = 63.2463, GNorm = 2.1269, lr_0 = 1.5304e-04
Loss = 3.6374e-01, PNorm = 63.2454, GNorm = 1.4761, lr_0 = 1.5293e-04
Loss = 3.7128e-01, PNorm = 63.2462, GNorm = 1.4509, lr_0 = 1.5283e-04
Loss = 3.7315e-01, PNorm = 63.2480, GNorm = 1.5384, lr_0 = 1.5272e-04
Loss = 3.1488e-01, PNorm = 63.2514, GNorm = 1.7478, lr_0 = 1.5262e-04
Loss = 3.5569e-01, PNorm = 63.2524, GNorm = 1.7246, lr_0 = 1.5251e-04
Loss = 3.3895e-01, PNorm = 63.2533, GNorm = 1.6225, lr_0 = 1.5241e-04
Loss = 3.5855e-01, PNorm = 63.2531, GNorm = 1.0711, lr_0 = 1.5230e-04
Loss = 3.8727e-01, PNorm = 63.2540, GNorm = 1.5697, lr_0 = 1.5220e-04
Loss = 3.5939e-01, PNorm = 63.2564, GNorm = 2.5435, lr_0 = 1.5209e-04
Loss = 3.3623e-01, PNorm = 63.2582, GNorm = 1.7355, lr_0 = 1.5199e-04
Loss = 3.6714e-01, PNorm = 63.2589, GNorm = 2.1714, lr_0 = 1.5189e-04
Loss = 3.0893e-01, PNorm = 63.2612, GNorm = 1.4234, lr_0 = 1.5178e-04
Loss = 3.3976e-01, PNorm = 63.2629, GNorm = 1.2308, lr_0 = 1.5168e-04
Loss = 3.4147e-01, PNorm = 63.2647, GNorm = 1.4895, lr_0 = 1.5157e-04
Loss = 3.2345e-01, PNorm = 63.2652, GNorm = 1.6943, lr_0 = 1.5147e-04
Loss = 3.7708e-01, PNorm = 63.2650, GNorm = 2.4801, lr_0 = 1.5137e-04
Loss = 3.8398e-01, PNorm = 63.2691, GNorm = 1.5270, lr_0 = 1.5126e-04
Loss = 3.7948e-01, PNorm = 63.2686, GNorm = 1.5587, lr_0 = 1.5116e-04
Loss = 3.4327e-01, PNorm = 63.2698, GNorm = 1.6111, lr_0 = 1.5106e-04
Loss = 3.2741e-01, PNorm = 63.2694, GNorm = 1.6904, lr_0 = 1.5095e-04
Loss = 3.5905e-01, PNorm = 63.2698, GNorm = 1.5189, lr_0 = 1.5085e-04
Validation mae = 0.111416
Epoch 25
Loss = 3.6122e-01, PNorm = 63.2712, GNorm = 1.7890, lr_0 = 1.5075e-04
Loss = 3.7411e-01, PNorm = 63.2733, GNorm = 1.5670, lr_0 = 1.5064e-04
Loss = 3.4218e-01, PNorm = 63.2744, GNorm = 1.2315, lr_0 = 1.5054e-04
Loss = 3.0395e-01, PNorm = 63.2740, GNorm = 1.3811, lr_0 = 1.5044e-04
Loss = 2.9261e-01, PNorm = 63.2737, GNorm = 1.6000, lr_0 = 1.5033e-04
Loss = 3.6211e-01, PNorm = 63.2766, GNorm = 1.2096, lr_0 = 1.5023e-04
Loss = 3.7339e-01, PNorm = 63.2789, GNorm = 1.1872, lr_0 = 1.5013e-04
Loss = 3.6384e-01, PNorm = 63.2821, GNorm = 1.2968, lr_0 = 1.5002e-04
Loss = 3.2177e-01, PNorm = 63.2848, GNorm = 1.5667, lr_0 = 1.4992e-04
Loss = 3.4307e-01, PNorm = 63.2864, GNorm = 1.5021, lr_0 = 1.4982e-04
Loss = 3.5933e-01, PNorm = 63.2878, GNorm = 2.2976, lr_0 = 1.4972e-04
Loss = 3.2040e-01, PNorm = 63.2900, GNorm = 1.8334, lr_0 = 1.4961e-04
Loss = 3.2933e-01, PNorm = 63.2904, GNorm = 1.5704, lr_0 = 1.4951e-04
Loss = 3.9482e-01, PNorm = 63.2886, GNorm = 2.2177, lr_0 = 1.4941e-04
Loss = 3.1927e-01, PNorm = 63.2891, GNorm = 1.4434, lr_0 = 1.4931e-04
Loss = 3.0805e-01, PNorm = 63.2909, GNorm = 1.4408, lr_0 = 1.4920e-04
Loss = 3.1604e-01, PNorm = 63.2928, GNorm = 1.6850, lr_0 = 1.4910e-04
Loss = 3.7559e-01, PNorm = 63.2948, GNorm = 1.5439, lr_0 = 1.4900e-04
Loss = 3.4246e-01, PNorm = 63.2965, GNorm = 1.1551, lr_0 = 1.4890e-04
Loss = 3.5064e-01, PNorm = 63.2990, GNorm = 1.4684, lr_0 = 1.4880e-04
Loss = 3.4609e-01, PNorm = 63.3012, GNorm = 1.7293, lr_0 = 1.4869e-04
Loss = 3.6409e-01, PNorm = 63.3010, GNorm = 1.4178, lr_0 = 1.4859e-04
Loss = 3.2942e-01, PNorm = 63.3001, GNorm = 1.0304, lr_0 = 1.4849e-04
Loss = 3.5583e-01, PNorm = 63.2999, GNorm = 1.3029, lr_0 = 1.4839e-04
Loss = 3.6853e-01, PNorm = 63.3030, GNorm = 1.7069, lr_0 = 1.4829e-04
Loss = 3.4107e-01, PNorm = 63.3056, GNorm = 1.5772, lr_0 = 1.4818e-04
Loss = 3.2895e-01, PNorm = 63.3069, GNorm = 1.1830, lr_0 = 1.4808e-04
Loss = 2.9851e-01, PNorm = 63.3075, GNorm = 1.4536, lr_0 = 1.4798e-04
Loss = 3.8576e-01, PNorm = 63.3084, GNorm = 1.2526, lr_0 = 1.4788e-04
Loss = 3.7678e-01, PNorm = 63.3099, GNorm = 1.8229, lr_0 = 1.4778e-04
Loss = 3.5168e-01, PNorm = 63.3121, GNorm = 1.6697, lr_0 = 1.4768e-04
Loss = 3.7281e-01, PNorm = 63.3143, GNorm = 1.9066, lr_0 = 1.4758e-04
Loss = 3.7899e-01, PNorm = 63.3148, GNorm = 1.4427, lr_0 = 1.4748e-04
Loss = 3.3651e-01, PNorm = 63.3172, GNorm = 1.0720, lr_0 = 1.4737e-04
Loss = 3.6027e-01, PNorm = 63.3176, GNorm = 1.7702, lr_0 = 1.4727e-04
Loss = 4.0530e-01, PNorm = 63.3191, GNorm = 1.3073, lr_0 = 1.4717e-04
Loss = 3.1057e-01, PNorm = 63.3209, GNorm = 1.3817, lr_0 = 1.4707e-04
Loss = 3.2984e-01, PNorm = 63.3227, GNorm = 1.2949, lr_0 = 1.4697e-04
Loss = 3.4378e-01, PNorm = 63.3255, GNorm = 1.1018, lr_0 = 1.4687e-04
Loss = 3.6700e-01, PNorm = 63.3288, GNorm = 2.0332, lr_0 = 1.4677e-04
Loss = 2.9156e-01, PNorm = 63.3319, GNorm = 1.7293, lr_0 = 1.4667e-04
Loss = 3.4250e-01, PNorm = 63.3329, GNorm = 1.4800, lr_0 = 1.4657e-04
Loss = 3.3888e-01, PNorm = 63.3348, GNorm = 1.5877, lr_0 = 1.4647e-04
Loss = 3.9375e-01, PNorm = 63.3365, GNorm = 1.8185, lr_0 = 1.4637e-04
Loss = 3.7528e-01, PNorm = 63.3377, GNorm = 1.0338, lr_0 = 1.4627e-04
Loss = 3.3846e-01, PNorm = 63.3420, GNorm = 1.6866, lr_0 = 1.4617e-04
Loss = 3.5814e-01, PNorm = 63.3429, GNorm = 1.3116, lr_0 = 1.4607e-04
Loss = 3.5703e-01, PNorm = 63.3426, GNorm = 1.5478, lr_0 = 1.4597e-04
Loss = 3.3285e-01, PNorm = 63.3459, GNorm = 2.2621, lr_0 = 1.4587e-04
Loss = 3.5094e-01, PNorm = 63.3463, GNorm = 2.1873, lr_0 = 1.4577e-04
Loss = 3.4536e-01, PNorm = 63.3456, GNorm = 1.1550, lr_0 = 1.4567e-04
Loss = 3.8040e-01, PNorm = 63.3484, GNorm = 1.0912, lr_0 = 1.4557e-04
Loss = 3.8264e-01, PNorm = 63.3507, GNorm = 1.1409, lr_0 = 1.4547e-04
Loss = 3.1636e-01, PNorm = 63.3509, GNorm = 1.2617, lr_0 = 1.4537e-04
Loss = 3.2766e-01, PNorm = 63.3490, GNorm = 1.2491, lr_0 = 1.4527e-04
Loss = 3.7591e-01, PNorm = 63.3491, GNorm = 1.7247, lr_0 = 1.4517e-04
Loss = 3.6054e-01, PNorm = 63.3492, GNorm = 1.4844, lr_0 = 1.4507e-04
Loss = 3.2179e-01, PNorm = 63.3484, GNorm = 1.9460, lr_0 = 1.4497e-04
Loss = 3.5006e-01, PNorm = 63.3498, GNorm = 1.1088, lr_0 = 1.4487e-04
Loss = 3.5008e-01, PNorm = 63.3526, GNorm = 1.3401, lr_0 = 1.4477e-04
Loss = 3.0698e-01, PNorm = 63.3555, GNorm = 1.4895, lr_0 = 1.4467e-04
Loss = 3.4295e-01, PNorm = 63.3578, GNorm = 1.0444, lr_0 = 1.4457e-04
Loss = 3.5373e-01, PNorm = 63.3595, GNorm = 1.8638, lr_0 = 1.4447e-04
Loss = 3.4280e-01, PNorm = 63.3609, GNorm = 1.0330, lr_0 = 1.4438e-04
Loss = 3.1903e-01, PNorm = 63.3610, GNorm = 1.7042, lr_0 = 1.4428e-04
Loss = 3.8584e-01, PNorm = 63.3614, GNorm = 1.0845, lr_0 = 1.4418e-04
Loss = 3.5390e-01, PNorm = 63.3616, GNorm = 1.2507, lr_0 = 1.4408e-04
Loss = 3.4990e-01, PNorm = 63.3622, GNorm = 1.5298, lr_0 = 1.4398e-04
Loss = 3.5408e-01, PNorm = 63.3628, GNorm = 1.4874, lr_0 = 1.4388e-04
Loss = 3.4742e-01, PNorm = 63.3650, GNorm = 1.8788, lr_0 = 1.4378e-04
Loss = 3.2297e-01, PNorm = 63.3681, GNorm = 1.3142, lr_0 = 1.4368e-04
Loss = 3.0735e-01, PNorm = 63.3697, GNorm = 1.9717, lr_0 = 1.4359e-04
Loss = 3.2685e-01, PNorm = 63.3699, GNorm = 1.4885, lr_0 = 1.4349e-04
Loss = 3.7062e-01, PNorm = 63.3723, GNorm = 1.2795, lr_0 = 1.4339e-04
Loss = 3.1114e-01, PNorm = 63.3745, GNorm = 1.5421, lr_0 = 1.4329e-04
Loss = 3.6739e-01, PNorm = 63.3756, GNorm = 2.3476, lr_0 = 1.4319e-04
Loss = 3.9250e-01, PNorm = 63.3759, GNorm = 1.4800, lr_0 = 1.4310e-04
Loss = 3.1891e-01, PNorm = 63.3764, GNorm = 1.3739, lr_0 = 1.4300e-04
Loss = 3.4186e-01, PNorm = 63.3756, GNorm = 1.3897, lr_0 = 1.4290e-04
Loss = 3.6369e-01, PNorm = 63.3754, GNorm = 1.4732, lr_0 = 1.4280e-04
Loss = 3.9020e-01, PNorm = 63.3767, GNorm = 1.6217, lr_0 = 1.4270e-04
Loss = 3.6295e-01, PNorm = 63.3800, GNorm = 1.3694, lr_0 = 1.4261e-04
Loss = 3.9581e-01, PNorm = 63.3805, GNorm = 1.8884, lr_0 = 1.4251e-04
Loss = 3.5214e-01, PNorm = 63.3810, GNorm = 1.3659, lr_0 = 1.4241e-04
Loss = 3.1599e-01, PNorm = 63.3835, GNorm = 0.9409, lr_0 = 1.4231e-04
Loss = 3.5894e-01, PNorm = 63.3854, GNorm = 1.7084, lr_0 = 1.4222e-04
Loss = 3.5806e-01, PNorm = 63.3868, GNorm = 1.1875, lr_0 = 1.4212e-04
Loss = 3.1943e-01, PNorm = 63.3871, GNorm = 1.3250, lr_0 = 1.4202e-04
Loss = 3.1777e-01, PNorm = 63.3886, GNorm = 1.4916, lr_0 = 1.4192e-04
Loss = 3.4350e-01, PNorm = 63.3900, GNorm = 1.4032, lr_0 = 1.4183e-04
Loss = 3.2755e-01, PNorm = 63.3903, GNorm = 1.5017, lr_0 = 1.4173e-04
Loss = 3.3918e-01, PNorm = 63.3925, GNorm = 1.9790, lr_0 = 1.4163e-04
Loss = 3.9368e-01, PNorm = 63.3929, GNorm = 1.2553, lr_0 = 1.4153e-04
Loss = 3.3246e-01, PNorm = 63.3936, GNorm = 1.4424, lr_0 = 1.4144e-04
Loss = 4.0718e-01, PNorm = 63.3947, GNorm = 1.7119, lr_0 = 1.4134e-04
Loss = 3.2582e-01, PNorm = 63.3945, GNorm = 1.3101, lr_0 = 1.4124e-04
Loss = 3.5903e-01, PNorm = 63.3945, GNorm = 1.6528, lr_0 = 1.4115e-04
Loss = 3.4130e-01, PNorm = 63.3959, GNorm = 1.1821, lr_0 = 1.4105e-04
Loss = 3.9073e-01, PNorm = 63.3974, GNorm = 1.8770, lr_0 = 1.4095e-04
Loss = 4.1021e-01, PNorm = 63.3993, GNorm = 1.6426, lr_0 = 1.4086e-04
Loss = 3.5833e-01, PNorm = 63.3997, GNorm = 2.2528, lr_0 = 1.4076e-04
Loss = 3.5476e-01, PNorm = 63.3990, GNorm = 1.6640, lr_0 = 1.4066e-04
Loss = 3.9212e-01, PNorm = 63.3986, GNorm = 1.4868, lr_0 = 1.4057e-04
Loss = 3.2791e-01, PNorm = 63.3994, GNorm = 1.3410, lr_0 = 1.4047e-04
Loss = 3.4750e-01, PNorm = 63.3998, GNorm = 2.3423, lr_0 = 1.4038e-04
Loss = 3.9301e-01, PNorm = 63.4003, GNorm = 1.4776, lr_0 = 1.4028e-04
Loss = 3.4167e-01, PNorm = 63.4013, GNorm = 1.3840, lr_0 = 1.4018e-04
Loss = 2.9109e-01, PNorm = 63.4031, GNorm = 1.2031, lr_0 = 1.4009e-04
Loss = 3.5815e-01, PNorm = 63.4046, GNorm = 1.7100, lr_0 = 1.3999e-04
Loss = 3.1453e-01, PNorm = 63.4063, GNorm = 1.5564, lr_0 = 1.3990e-04
Loss = 3.3792e-01, PNorm = 63.4081, GNorm = 1.6014, lr_0 = 1.3980e-04
Loss = 3.4809e-01, PNorm = 63.4082, GNorm = 1.2250, lr_0 = 1.3970e-04
Loss = 4.1063e-01, PNorm = 63.4100, GNorm = 1.5950, lr_0 = 1.3961e-04
Loss = 3.6782e-01, PNorm = 63.4111, GNorm = 1.8147, lr_0 = 1.3951e-04
Loss = 3.9627e-01, PNorm = 63.4142, GNorm = 1.6886, lr_0 = 1.3942e-04
Loss = 3.8683e-01, PNorm = 63.4157, GNorm = 1.4201, lr_0 = 1.3932e-04
Loss = 3.7119e-01, PNorm = 63.4170, GNorm = 1.7174, lr_0 = 1.3923e-04
Loss = 3.7812e-01, PNorm = 63.4180, GNorm = 2.3252, lr_0 = 1.3913e-04
Loss = 3.2702e-01, PNorm = 63.4195, GNorm = 1.8458, lr_0 = 1.3904e-04
Loss = 3.4689e-01, PNorm = 63.4200, GNorm = 1.1553, lr_0 = 1.3894e-04
Validation mae = 0.111414
Epoch 26
Loss = 3.5007e-01, PNorm = 63.4212, GNorm = 1.5202, lr_0 = 1.3884e-04
Loss = 3.1434e-01, PNorm = 63.4240, GNorm = 1.0231, lr_0 = 1.3875e-04
Loss = 3.4460e-01, PNorm = 63.4248, GNorm = 1.0852, lr_0 = 1.3865e-04
Loss = 3.5565e-01, PNorm = 63.4253, GNorm = 1.3358, lr_0 = 1.3856e-04
Loss = 4.2263e-01, PNorm = 63.4269, GNorm = 2.0355, lr_0 = 1.3846e-04
Loss = 3.4710e-01, PNorm = 63.4266, GNorm = 1.4676, lr_0 = 1.3837e-04
Loss = 3.1712e-01, PNorm = 63.4273, GNorm = 1.3735, lr_0 = 1.3828e-04
Loss = 3.2834e-01, PNorm = 63.4280, GNorm = 1.7978, lr_0 = 1.3818e-04
Loss = 3.3325e-01, PNorm = 63.4293, GNorm = 1.6675, lr_0 = 1.3809e-04
Loss = 3.6436e-01, PNorm = 63.4300, GNorm = 1.5617, lr_0 = 1.3799e-04
Loss = 3.5980e-01, PNorm = 63.4310, GNorm = 1.6551, lr_0 = 1.3790e-04
Loss = 3.2439e-01, PNorm = 63.4329, GNorm = 1.5255, lr_0 = 1.3780e-04
Loss = 3.5621e-01, PNorm = 63.4317, GNorm = 1.1866, lr_0 = 1.3771e-04
Loss = 3.5071e-01, PNorm = 63.4334, GNorm = 1.6411, lr_0 = 1.3761e-04
Loss = 3.2704e-01, PNorm = 63.4383, GNorm = 1.8254, lr_0 = 1.3752e-04
Loss = 3.0438e-01, PNorm = 63.4380, GNorm = 1.2671, lr_0 = 1.3742e-04
Loss = 3.5929e-01, PNorm = 63.4378, GNorm = 1.9131, lr_0 = 1.3733e-04
Loss = 3.1971e-01, PNorm = 63.4397, GNorm = 1.2348, lr_0 = 1.3724e-04
Loss = 3.5058e-01, PNorm = 63.4412, GNorm = 1.0626, lr_0 = 1.3714e-04
Loss = 3.1752e-01, PNorm = 63.4436, GNorm = 1.5792, lr_0 = 1.3705e-04
Loss = 3.9776e-01, PNorm = 63.4449, GNorm = 1.2982, lr_0 = 1.3695e-04
Loss = 3.3347e-01, PNorm = 63.4442, GNorm = 2.0486, lr_0 = 1.3686e-04
Loss = 3.3411e-01, PNorm = 63.4442, GNorm = 1.6316, lr_0 = 1.3677e-04
Loss = 3.9949e-01, PNorm = 63.4447, GNorm = 1.5239, lr_0 = 1.3667e-04
Loss = 3.4923e-01, PNorm = 63.4477, GNorm = 1.3819, lr_0 = 1.3658e-04
Loss = 3.4406e-01, PNorm = 63.4490, GNorm = 1.2921, lr_0 = 1.3649e-04
Loss = 4.1440e-01, PNorm = 63.4497, GNorm = 1.3399, lr_0 = 1.3639e-04
Loss = 3.9596e-01, PNorm = 63.4485, GNorm = 1.4018, lr_0 = 1.3630e-04
Loss = 3.4126e-01, PNorm = 63.4495, GNorm = 1.3085, lr_0 = 1.3621e-04
Loss = 3.5942e-01, PNorm = 63.4533, GNorm = 1.4714, lr_0 = 1.3611e-04
Loss = 3.2191e-01, PNorm = 63.4553, GNorm = 1.5575, lr_0 = 1.3602e-04
Loss = 3.8655e-01, PNorm = 63.4563, GNorm = 1.4837, lr_0 = 1.3593e-04
Loss = 3.6256e-01, PNorm = 63.4591, GNorm = 1.2645, lr_0 = 1.3583e-04
Loss = 3.2521e-01, PNorm = 63.4615, GNorm = 1.4508, lr_0 = 1.3574e-04
Loss = 3.5571e-01, PNorm = 63.4619, GNorm = 1.7919, lr_0 = 1.3565e-04
Loss = 3.1504e-01, PNorm = 63.4619, GNorm = 1.0319, lr_0 = 1.3555e-04
Loss = 3.4421e-01, PNorm = 63.4626, GNorm = 1.2772, lr_0 = 1.3546e-04
Loss = 2.8943e-01, PNorm = 63.4634, GNorm = 1.1442, lr_0 = 1.3537e-04
Loss = 3.6640e-01, PNorm = 63.4632, GNorm = 1.5978, lr_0 = 1.3528e-04
Loss = 3.3235e-01, PNorm = 63.4641, GNorm = 1.0804, lr_0 = 1.3518e-04
Loss = 3.6110e-01, PNorm = 63.4653, GNorm = 1.7579, lr_0 = 1.3509e-04
Loss = 3.6724e-01, PNorm = 63.4658, GNorm = 1.5119, lr_0 = 1.3500e-04
Loss = 3.2845e-01, PNorm = 63.4656, GNorm = 1.4797, lr_0 = 1.3491e-04
Loss = 3.9115e-01, PNorm = 63.4689, GNorm = 1.7562, lr_0 = 1.3481e-04
Loss = 3.2413e-01, PNorm = 63.4714, GNorm = 1.4464, lr_0 = 1.3472e-04
Loss = 3.7351e-01, PNorm = 63.4709, GNorm = 2.1650, lr_0 = 1.3463e-04
Loss = 3.4706e-01, PNorm = 63.4695, GNorm = 1.3877, lr_0 = 1.3454e-04
Loss = 3.5908e-01, PNorm = 63.4687, GNorm = 1.3445, lr_0 = 1.3444e-04
Loss = 3.6232e-01, PNorm = 63.4694, GNorm = 2.1685, lr_0 = 1.3435e-04
Loss = 3.6581e-01, PNorm = 63.4697, GNorm = 1.6488, lr_0 = 1.3426e-04
Loss = 3.8536e-01, PNorm = 63.4699, GNorm = 1.8148, lr_0 = 1.3417e-04
Loss = 3.6451e-01, PNorm = 63.4719, GNorm = 1.6967, lr_0 = 1.3408e-04
Loss = 3.8313e-01, PNorm = 63.4749, GNorm = 1.6619, lr_0 = 1.3398e-04
Loss = 4.0459e-01, PNorm = 63.4780, GNorm = 1.6987, lr_0 = 1.3389e-04
Loss = 3.2913e-01, PNorm = 63.4804, GNorm = 1.3894, lr_0 = 1.3380e-04
Loss = 3.0258e-01, PNorm = 63.4832, GNorm = 1.6447, lr_0 = 1.3371e-04
Loss = 3.3926e-01, PNorm = 63.4835, GNorm = 1.0773, lr_0 = 1.3362e-04
Loss = 3.6779e-01, PNorm = 63.4848, GNorm = 1.7895, lr_0 = 1.3353e-04
Loss = 3.2597e-01, PNorm = 63.4877, GNorm = 1.3955, lr_0 = 1.3343e-04
Loss = 3.1774e-01, PNorm = 63.4909, GNorm = 1.2989, lr_0 = 1.3334e-04
Loss = 3.4325e-01, PNorm = 63.4909, GNorm = 1.7894, lr_0 = 1.3325e-04
Loss = 3.4394e-01, PNorm = 63.4928, GNorm = 1.2685, lr_0 = 1.3316e-04
Loss = 3.2235e-01, PNorm = 63.4938, GNorm = 1.3799, lr_0 = 1.3307e-04
Loss = 3.2945e-01, PNorm = 63.4938, GNorm = 1.5598, lr_0 = 1.3298e-04
Loss = 3.3868e-01, PNorm = 63.4959, GNorm = 1.3700, lr_0 = 1.3289e-04
Loss = 3.2534e-01, PNorm = 63.4968, GNorm = 1.5702, lr_0 = 1.3280e-04
Loss = 3.6955e-01, PNorm = 63.4955, GNorm = 1.3596, lr_0 = 1.3270e-04
Loss = 3.5015e-01, PNorm = 63.4968, GNorm = 1.2821, lr_0 = 1.3261e-04
Loss = 3.5984e-01, PNorm = 63.4992, GNorm = 1.4265, lr_0 = 1.3252e-04
Loss = 3.6330e-01, PNorm = 63.5007, GNorm = 1.3684, lr_0 = 1.3243e-04
Loss = 3.7447e-01, PNorm = 63.5025, GNorm = 1.3966, lr_0 = 1.3234e-04
Loss = 3.4677e-01, PNorm = 63.5021, GNorm = 1.4776, lr_0 = 1.3225e-04
Loss = 3.6099e-01, PNorm = 63.5034, GNorm = 1.1926, lr_0 = 1.3216e-04
Loss = 3.3425e-01, PNorm = 63.5042, GNorm = 1.2429, lr_0 = 1.3207e-04
Loss = 2.8049e-01, PNorm = 63.5050, GNorm = 1.4269, lr_0 = 1.3198e-04
Loss = 3.1450e-01, PNorm = 63.5065, GNorm = 1.8285, lr_0 = 1.3189e-04
Loss = 4.0536e-01, PNorm = 63.5070, GNorm = 1.5714, lr_0 = 1.3180e-04
Loss = 3.5556e-01, PNorm = 63.5082, GNorm = 1.4875, lr_0 = 1.3171e-04
Loss = 4.0958e-01, PNorm = 63.5120, GNorm = 2.7952, lr_0 = 1.3162e-04
Loss = 3.2881e-01, PNorm = 63.5119, GNorm = 2.0048, lr_0 = 1.3153e-04
Loss = 3.4141e-01, PNorm = 63.5119, GNorm = 1.6957, lr_0 = 1.3144e-04
Loss = 3.7146e-01, PNorm = 63.5122, GNorm = 1.3954, lr_0 = 1.3135e-04
Loss = 3.3600e-01, PNorm = 63.5129, GNorm = 1.5825, lr_0 = 1.3126e-04
Loss = 4.0541e-01, PNorm = 63.5146, GNorm = 2.4121, lr_0 = 1.3117e-04
Loss = 3.3431e-01, PNorm = 63.5155, GNorm = 1.4163, lr_0 = 1.3108e-04
Loss = 3.9514e-01, PNorm = 63.5155, GNorm = 1.2701, lr_0 = 1.3099e-04
Loss = 3.5491e-01, PNorm = 63.5157, GNorm = 2.2210, lr_0 = 1.3090e-04
Loss = 2.9655e-01, PNorm = 63.5164, GNorm = 2.1386, lr_0 = 1.3081e-04
Loss = 3.9386e-01, PNorm = 63.5157, GNorm = 1.6565, lr_0 = 1.3072e-04
Loss = 3.4027e-01, PNorm = 63.5173, GNorm = 0.9798, lr_0 = 1.3063e-04
Loss = 3.4155e-01, PNorm = 63.5189, GNorm = 1.1040, lr_0 = 1.3054e-04
Loss = 3.3130e-01, PNorm = 63.5200, GNorm = 1.1611, lr_0 = 1.3045e-04
Loss = 3.7260e-01, PNorm = 63.5207, GNorm = 1.5810, lr_0 = 1.3036e-04
Loss = 3.6734e-01, PNorm = 63.5228, GNorm = 1.5931, lr_0 = 1.3027e-04
Loss = 3.1610e-01, PNorm = 63.5251, GNorm = 1.5713, lr_0 = 1.3018e-04
Loss = 3.4715e-01, PNorm = 63.5254, GNorm = 1.4857, lr_0 = 1.3009e-04
Loss = 3.2512e-01, PNorm = 63.5264, GNorm = 2.2730, lr_0 = 1.3000e-04
Loss = 3.6685e-01, PNorm = 63.5282, GNorm = 1.4587, lr_0 = 1.2992e-04
Loss = 3.3244e-01, PNorm = 63.5294, GNorm = 1.2563, lr_0 = 1.2983e-04
Loss = 3.6835e-01, PNorm = 63.5301, GNorm = 1.7406, lr_0 = 1.2974e-04
Loss = 3.2517e-01, PNorm = 63.5319, GNorm = 1.1701, lr_0 = 1.2965e-04
Loss = 3.4089e-01, PNorm = 63.5332, GNorm = 1.2737, lr_0 = 1.2956e-04
Loss = 3.3212e-01, PNorm = 63.5343, GNorm = 1.3718, lr_0 = 1.2947e-04
Loss = 3.3582e-01, PNorm = 63.5368, GNorm = 1.3888, lr_0 = 1.2938e-04
Loss = 3.4210e-01, PNorm = 63.5385, GNorm = 1.3609, lr_0 = 1.2929e-04
Loss = 3.3563e-01, PNorm = 63.5412, GNorm = 1.6201, lr_0 = 1.2921e-04
Loss = 3.5733e-01, PNorm = 63.5427, GNorm = 1.3082, lr_0 = 1.2912e-04
Loss = 2.9296e-01, PNorm = 63.5427, GNorm = 1.0275, lr_0 = 1.2903e-04
Loss = 3.0433e-01, PNorm = 63.5442, GNorm = 2.2837, lr_0 = 1.2894e-04
Loss = 3.5163e-01, PNorm = 63.5442, GNorm = 1.5167, lr_0 = 1.2885e-04
Loss = 3.4331e-01, PNorm = 63.5450, GNorm = 1.3507, lr_0 = 1.2876e-04
Loss = 3.4750e-01, PNorm = 63.5446, GNorm = 1.5532, lr_0 = 1.2867e-04
Loss = 3.1978e-01, PNorm = 63.5451, GNorm = 1.2579, lr_0 = 1.2859e-04
Loss = 3.5192e-01, PNorm = 63.5475, GNorm = 1.3169, lr_0 = 1.2850e-04
Loss = 3.5987e-01, PNorm = 63.5485, GNorm = 1.4025, lr_0 = 1.2841e-04
Loss = 3.5225e-01, PNorm = 63.5499, GNorm = 1.7399, lr_0 = 1.2832e-04
Loss = 3.3236e-01, PNorm = 63.5517, GNorm = 1.4805, lr_0 = 1.2823e-04
Loss = 3.2290e-01, PNorm = 63.5523, GNorm = 1.5343, lr_0 = 1.2815e-04
Loss = 3.3392e-01, PNorm = 63.5523, GNorm = 1.1436, lr_0 = 1.2806e-04
Loss = 3.3782e-01, PNorm = 63.5520, GNorm = 1.1898, lr_0 = 1.2797e-04
Validation mae = 0.112223
Epoch 27
Loss = 2.9931e-01, PNorm = 63.5542, GNorm = 1.2891, lr_0 = 1.2788e-04
Loss = 3.4407e-01, PNorm = 63.5554, GNorm = 1.1509, lr_0 = 1.2780e-04
Loss = 3.4677e-01, PNorm = 63.5573, GNorm = 1.8036, lr_0 = 1.2771e-04
Loss = 3.4930e-01, PNorm = 63.5597, GNorm = 2.0660, lr_0 = 1.2762e-04
Loss = 3.0947e-01, PNorm = 63.5610, GNorm = 1.8669, lr_0 = 1.2753e-04
Loss = 3.1332e-01, PNorm = 63.5619, GNorm = 1.5223, lr_0 = 1.2745e-04
Loss = 3.4662e-01, PNorm = 63.5629, GNorm = 1.2345, lr_0 = 1.2736e-04
Loss = 3.6059e-01, PNorm = 63.5643, GNorm = 1.3748, lr_0 = 1.2727e-04
Loss = 3.6243e-01, PNorm = 63.5647, GNorm = 1.0893, lr_0 = 1.2718e-04
Loss = 3.4594e-01, PNorm = 63.5636, GNorm = 2.4312, lr_0 = 1.2710e-04
Loss = 3.0615e-01, PNorm = 63.5647, GNorm = 1.3394, lr_0 = 1.2701e-04
Loss = 2.7984e-01, PNorm = 63.5668, GNorm = 1.5917, lr_0 = 1.2692e-04
Loss = 3.6556e-01, PNorm = 63.5674, GNorm = 1.6833, lr_0 = 1.2684e-04
Loss = 3.5634e-01, PNorm = 63.5675, GNorm = 2.4523, lr_0 = 1.2675e-04
Loss = 3.0866e-01, PNorm = 63.5698, GNorm = 1.5373, lr_0 = 1.2666e-04
Loss = 3.3792e-01, PNorm = 63.5702, GNorm = 1.6949, lr_0 = 1.2658e-04
Loss = 3.1589e-01, PNorm = 63.5722, GNorm = 2.0688, lr_0 = 1.2649e-04
Loss = 3.5177e-01, PNorm = 63.5720, GNorm = 1.7699, lr_0 = 1.2640e-04
Loss = 4.0732e-01, PNorm = 63.5712, GNorm = 1.6521, lr_0 = 1.2632e-04
Loss = 3.1888e-01, PNorm = 63.5706, GNorm = 1.7143, lr_0 = 1.2623e-04
Loss = 3.3198e-01, PNorm = 63.5694, GNorm = 1.0869, lr_0 = 1.2614e-04
Loss = 3.9272e-01, PNorm = 63.5715, GNorm = 1.3467, lr_0 = 1.2606e-04
Loss = 3.7421e-01, PNorm = 63.5727, GNorm = 1.9154, lr_0 = 1.2597e-04
Loss = 3.5989e-01, PNorm = 63.5753, GNorm = 1.3915, lr_0 = 1.2588e-04
Loss = 3.9016e-01, PNorm = 63.5773, GNorm = 1.9797, lr_0 = 1.2580e-04
Loss = 3.7840e-01, PNorm = 63.5794, GNorm = 1.1033, lr_0 = 1.2571e-04
Loss = 3.4558e-01, PNorm = 63.5817, GNorm = 1.7806, lr_0 = 1.2563e-04
Loss = 3.5891e-01, PNorm = 63.5830, GNorm = 1.4270, lr_0 = 1.2554e-04
Loss = 3.6419e-01, PNorm = 63.5828, GNorm = 1.1313, lr_0 = 1.2545e-04
Loss = 3.6256e-01, PNorm = 63.5830, GNorm = 1.0666, lr_0 = 1.2537e-04
Loss = 3.5770e-01, PNorm = 63.5842, GNorm = 1.4305, lr_0 = 1.2528e-04
Loss = 3.3059e-01, PNorm = 63.5865, GNorm = 1.5226, lr_0 = 1.2520e-04
Loss = 2.9545e-01, PNorm = 63.5862, GNorm = 0.9880, lr_0 = 1.2511e-04
Loss = 3.0742e-01, PNorm = 63.5863, GNorm = 1.1262, lr_0 = 1.2502e-04
Loss = 4.6183e-01, PNorm = 63.5877, GNorm = 1.7368, lr_0 = 1.2494e-04
Loss = 3.2407e-01, PNorm = 63.5892, GNorm = 1.2371, lr_0 = 1.2485e-04
Loss = 3.3181e-01, PNorm = 63.5899, GNorm = 1.8414, lr_0 = 1.2477e-04
Loss = 3.3335e-01, PNorm = 63.5923, GNorm = 1.6713, lr_0 = 1.2468e-04
Loss = 3.3479e-01, PNorm = 63.5939, GNorm = 1.7134, lr_0 = 1.2460e-04
Loss = 3.7405e-01, PNorm = 63.5948, GNorm = 1.7117, lr_0 = 1.2451e-04
Loss = 3.5373e-01, PNorm = 63.5960, GNorm = 1.4529, lr_0 = 1.2443e-04
Loss = 3.3230e-01, PNorm = 63.5974, GNorm = 1.5344, lr_0 = 1.2434e-04
Loss = 3.2663e-01, PNorm = 63.5986, GNorm = 1.8053, lr_0 = 1.2426e-04
Loss = 3.0961e-01, PNorm = 63.6000, GNorm = 1.5050, lr_0 = 1.2417e-04
Loss = 3.3196e-01, PNorm = 63.6006, GNorm = 1.3418, lr_0 = 1.2409e-04
Loss = 3.5702e-01, PNorm = 63.6021, GNorm = 1.9161, lr_0 = 1.2400e-04
Loss = 3.1680e-01, PNorm = 63.6010, GNorm = 1.5595, lr_0 = 1.2392e-04
Loss = 3.5152e-01, PNorm = 63.6007, GNorm = 1.3908, lr_0 = 1.2383e-04
Loss = 3.3592e-01, PNorm = 63.6001, GNorm = 1.4067, lr_0 = 1.2375e-04
Loss = 3.3395e-01, PNorm = 63.6014, GNorm = 1.5912, lr_0 = 1.2366e-04
Loss = 3.4308e-01, PNorm = 63.6014, GNorm = 1.0707, lr_0 = 1.2358e-04
Loss = 3.5330e-01, PNorm = 63.6014, GNorm = 1.9788, lr_0 = 1.2349e-04
Loss = 3.2458e-01, PNorm = 63.6015, GNorm = 2.1880, lr_0 = 1.2341e-04
Loss = 3.7693e-01, PNorm = 63.6028, GNorm = 1.6174, lr_0 = 1.2332e-04
Loss = 3.4803e-01, PNorm = 63.6036, GNorm = 1.6030, lr_0 = 1.2324e-04
Loss = 3.3226e-01, PNorm = 63.6047, GNorm = 1.2835, lr_0 = 1.2315e-04
Loss = 3.7099e-01, PNorm = 63.6059, GNorm = 1.3972, lr_0 = 1.2307e-04
Loss = 3.6814e-01, PNorm = 63.6074, GNorm = 1.6853, lr_0 = 1.2298e-04
Loss = 3.8039e-01, PNorm = 63.6073, GNorm = 1.5235, lr_0 = 1.2290e-04
Loss = 3.4563e-01, PNorm = 63.6067, GNorm = 1.7243, lr_0 = 1.2282e-04
Loss = 3.2440e-01, PNorm = 63.6079, GNorm = 1.4789, lr_0 = 1.2273e-04
Loss = 3.5006e-01, PNorm = 63.6083, GNorm = 2.0199, lr_0 = 1.2265e-04
Loss = 3.5022e-01, PNorm = 63.6082, GNorm = 1.7535, lr_0 = 1.2256e-04
Loss = 3.0484e-01, PNorm = 63.6094, GNorm = 1.5388, lr_0 = 1.2248e-04
Loss = 3.8004e-01, PNorm = 63.6100, GNorm = 2.5934, lr_0 = 1.2240e-04
Loss = 4.1215e-01, PNorm = 63.6124, GNorm = 1.2789, lr_0 = 1.2231e-04
Loss = 3.3769e-01, PNorm = 63.6130, GNorm = 1.4137, lr_0 = 1.2223e-04
Loss = 3.4296e-01, PNorm = 63.6142, GNorm = 1.6094, lr_0 = 1.2214e-04
Loss = 3.3030e-01, PNorm = 63.6159, GNorm = 1.2042, lr_0 = 1.2206e-04
Loss = 2.9765e-01, PNorm = 63.6168, GNorm = 0.9614, lr_0 = 1.2198e-04
Loss = 3.4337e-01, PNorm = 63.6181, GNorm = 1.6648, lr_0 = 1.2189e-04
Loss = 3.7032e-01, PNorm = 63.6206, GNorm = 1.1585, lr_0 = 1.2181e-04
Loss = 3.4804e-01, PNorm = 63.6233, GNorm = 1.6975, lr_0 = 1.2173e-04
Loss = 3.4248e-01, PNorm = 63.6257, GNorm = 1.3960, lr_0 = 1.2164e-04
Loss = 3.6951e-01, PNorm = 63.6270, GNorm = 1.1881, lr_0 = 1.2156e-04
Loss = 3.4064e-01, PNorm = 63.6279, GNorm = 1.4637, lr_0 = 1.2148e-04
Loss = 3.3702e-01, PNorm = 63.6310, GNorm = 1.4242, lr_0 = 1.2139e-04
Loss = 3.5619e-01, PNorm = 63.6329, GNorm = 1.5940, lr_0 = 1.2131e-04
Loss = 3.0552e-01, PNorm = 63.6327, GNorm = 2.3937, lr_0 = 1.2123e-04
Loss = 3.3683e-01, PNorm = 63.6352, GNorm = 1.7830, lr_0 = 1.2114e-04
Loss = 3.5954e-01, PNorm = 63.6368, GNorm = 1.3417, lr_0 = 1.2106e-04
Loss = 3.7761e-01, PNorm = 63.6369, GNorm = 1.3627, lr_0 = 1.2098e-04
Loss = 3.4309e-01, PNorm = 63.6384, GNorm = 1.2367, lr_0 = 1.2090e-04
Loss = 3.7272e-01, PNorm = 63.6394, GNorm = 1.9279, lr_0 = 1.2081e-04
Loss = 3.0063e-01, PNorm = 63.6398, GNorm = 1.8383, lr_0 = 1.2073e-04
Loss = 3.2001e-01, PNorm = 63.6404, GNorm = 1.7787, lr_0 = 1.2065e-04
Loss = 3.6849e-01, PNorm = 63.6397, GNorm = 1.1566, lr_0 = 1.2056e-04
Loss = 3.5177e-01, PNorm = 63.6397, GNorm = 1.5673, lr_0 = 1.2048e-04
Loss = 3.7295e-01, PNorm = 63.6412, GNorm = 2.1577, lr_0 = 1.2040e-04
Loss = 3.8283e-01, PNorm = 63.6431, GNorm = 1.6142, lr_0 = 1.2032e-04
Loss = 3.4487e-01, PNorm = 63.6453, GNorm = 1.3836, lr_0 = 1.2023e-04
Loss = 3.9818e-01, PNorm = 63.6467, GNorm = 1.7063, lr_0 = 1.2015e-04
Loss = 3.6946e-01, PNorm = 63.6473, GNorm = 1.4802, lr_0 = 1.2007e-04
Loss = 3.2281e-01, PNorm = 63.6473, GNorm = 1.4369, lr_0 = 1.1999e-04
Loss = 3.1599e-01, PNorm = 63.6478, GNorm = 1.6826, lr_0 = 1.1991e-04
Loss = 3.3982e-01, PNorm = 63.6486, GNorm = 1.2778, lr_0 = 1.1982e-04
Loss = 3.4786e-01, PNorm = 63.6489, GNorm = 1.5310, lr_0 = 1.1974e-04
Loss = 3.0493e-01, PNorm = 63.6520, GNorm = 1.4557, lr_0 = 1.1966e-04
Loss = 3.1621e-01, PNorm = 63.6537, GNorm = 1.6721, lr_0 = 1.1958e-04
Loss = 4.0390e-01, PNorm = 63.6551, GNorm = 1.5694, lr_0 = 1.1950e-04
Loss = 3.4565e-01, PNorm = 63.6583, GNorm = 1.3156, lr_0 = 1.1941e-04
Loss = 3.4908e-01, PNorm = 63.6612, GNorm = 1.5813, lr_0 = 1.1933e-04
Loss = 3.5139e-01, PNorm = 63.6630, GNorm = 1.8198, lr_0 = 1.1925e-04
Loss = 3.4155e-01, PNorm = 63.6628, GNorm = 1.7271, lr_0 = 1.1917e-04
Loss = 3.4781e-01, PNorm = 63.6622, GNorm = 1.7111, lr_0 = 1.1909e-04
Loss = 3.2078e-01, PNorm = 63.6627, GNorm = 1.6592, lr_0 = 1.1901e-04
Loss = 3.3011e-01, PNorm = 63.6618, GNorm = 1.5171, lr_0 = 1.1892e-04
Loss = 3.2305e-01, PNorm = 63.6623, GNorm = 1.3048, lr_0 = 1.1884e-04
Loss = 3.4200e-01, PNorm = 63.6634, GNorm = 1.5664, lr_0 = 1.1876e-04
Loss = 3.1257e-01, PNorm = 63.6652, GNorm = 1.6019, lr_0 = 1.1868e-04
Loss = 3.1224e-01, PNorm = 63.6669, GNorm = 1.7511, lr_0 = 1.1860e-04
Loss = 3.0902e-01, PNorm = 63.6676, GNorm = 2.5316, lr_0 = 1.1852e-04
Loss = 3.5354e-01, PNorm = 63.6675, GNorm = 1.2738, lr_0 = 1.1844e-04
Loss = 3.3464e-01, PNorm = 63.6676, GNorm = 1.5901, lr_0 = 1.1835e-04
Loss = 3.4166e-01, PNorm = 63.6685, GNorm = 1.7058, lr_0 = 1.1827e-04
Loss = 3.4408e-01, PNorm = 63.6679, GNorm = 1.9037, lr_0 = 1.1819e-04
Loss = 3.2321e-01, PNorm = 63.6685, GNorm = 1.2663, lr_0 = 1.1811e-04
Loss = 3.9970e-01, PNorm = 63.6688, GNorm = 1.9768, lr_0 = 1.1803e-04
Loss = 2.9946e-01, PNorm = 63.6721, GNorm = 1.0073, lr_0 = 1.1795e-04
Loss = 3.9537e-01, PNorm = 63.6751, GNorm = 1.4630, lr_0 = 1.1787e-04
Validation mae = 0.111132
Epoch 28
Loss = 3.1422e-01, PNorm = 63.6751, GNorm = 1.4499, lr_0 = 1.1779e-04
Loss = 3.3279e-01, PNorm = 63.6759, GNorm = 1.3523, lr_0 = 1.1771e-04
Loss = 3.7506e-01, PNorm = 63.6770, GNorm = 1.2279, lr_0 = 1.1763e-04
Loss = 3.3878e-01, PNorm = 63.6799, GNorm = 1.0063, lr_0 = 1.1755e-04
Loss = 3.3004e-01, PNorm = 63.6817, GNorm = 1.7999, lr_0 = 1.1747e-04
Loss = 3.0211e-01, PNorm = 63.6827, GNorm = 1.5028, lr_0 = 1.1739e-04
Loss = 3.4841e-01, PNorm = 63.6831, GNorm = 2.0078, lr_0 = 1.1730e-04
Loss = 3.2987e-01, PNorm = 63.6837, GNorm = 1.1532, lr_0 = 1.1722e-04
Loss = 3.1525e-01, PNorm = 63.6851, GNorm = 1.4863, lr_0 = 1.1714e-04
Loss = 3.3155e-01, PNorm = 63.6881, GNorm = 1.0270, lr_0 = 1.1706e-04
Loss = 2.8826e-01, PNorm = 63.6887, GNorm = 1.1218, lr_0 = 1.1698e-04
Loss = 3.5177e-01, PNorm = 63.6887, GNorm = 1.3084, lr_0 = 1.1690e-04
Loss = 3.4976e-01, PNorm = 63.6888, GNorm = 1.6559, lr_0 = 1.1682e-04
Loss = 3.5193e-01, PNorm = 63.6887, GNorm = 1.1677, lr_0 = 1.1674e-04
Loss = 3.7263e-01, PNorm = 63.6890, GNorm = 2.0898, lr_0 = 1.1666e-04
Loss = 3.2840e-01, PNorm = 63.6895, GNorm = 1.5944, lr_0 = 1.1658e-04
Loss = 3.8468e-01, PNorm = 63.6895, GNorm = 2.1280, lr_0 = 1.1650e-04
Loss = 3.3375e-01, PNorm = 63.6911, GNorm = 1.4774, lr_0 = 1.1642e-04
Loss = 3.5318e-01, PNorm = 63.6926, GNorm = 1.7207, lr_0 = 1.1634e-04
Loss = 3.2192e-01, PNorm = 63.6933, GNorm = 1.5605, lr_0 = 1.1626e-04
Loss = 3.1966e-01, PNorm = 63.6929, GNorm = 1.7666, lr_0 = 1.1618e-04
Loss = 3.8269e-01, PNorm = 63.6939, GNorm = 2.2512, lr_0 = 1.1611e-04
Loss = 3.7663e-01, PNorm = 63.6945, GNorm = 1.5170, lr_0 = 1.1603e-04
Loss = 3.6401e-01, PNorm = 63.6968, GNorm = 1.0358, lr_0 = 1.1595e-04
Loss = 2.9369e-01, PNorm = 63.6979, GNorm = 1.8742, lr_0 = 1.1587e-04
Loss = 3.4586e-01, PNorm = 63.6997, GNorm = 1.4876, lr_0 = 1.1579e-04
Loss = 3.6429e-01, PNorm = 63.7015, GNorm = 1.5815, lr_0 = 1.1571e-04
Loss = 3.2202e-01, PNorm = 63.7010, GNorm = 1.6740, lr_0 = 1.1563e-04
Loss = 3.4829e-01, PNorm = 63.7009, GNorm = 1.2849, lr_0 = 1.1555e-04
Loss = 3.2142e-01, PNorm = 63.7024, GNorm = 1.1658, lr_0 = 1.1547e-04
Loss = 3.3788e-01, PNorm = 63.7015, GNorm = 1.4593, lr_0 = 1.1539e-04
Loss = 3.6286e-01, PNorm = 63.7026, GNorm = 2.1940, lr_0 = 1.1531e-04
Loss = 3.2522e-01, PNorm = 63.7039, GNorm = 1.6322, lr_0 = 1.1523e-04
Loss = 3.2775e-01, PNorm = 63.7048, GNorm = 1.8334, lr_0 = 1.1515e-04
Loss = 3.7403e-01, PNorm = 63.7053, GNorm = 1.7466, lr_0 = 1.1508e-04
Loss = 3.4279e-01, PNorm = 63.7063, GNorm = 1.3836, lr_0 = 1.1500e-04
Loss = 3.6365e-01, PNorm = 63.7074, GNorm = 1.3479, lr_0 = 1.1492e-04
Loss = 3.3591e-01, PNorm = 63.7092, GNorm = 1.7595, lr_0 = 1.1484e-04
Loss = 3.2624e-01, PNorm = 63.7110, GNorm = 1.4366, lr_0 = 1.1476e-04
Loss = 3.4844e-01, PNorm = 63.7115, GNorm = 1.3253, lr_0 = 1.1468e-04
Loss = 3.3645e-01, PNorm = 63.7129, GNorm = 1.2592, lr_0 = 1.1460e-04
Loss = 3.4115e-01, PNorm = 63.7125, GNorm = 1.2007, lr_0 = 1.1452e-04
Loss = 3.9569e-01, PNorm = 63.7122, GNorm = 1.2492, lr_0 = 1.1445e-04
Loss = 3.5828e-01, PNorm = 63.7132, GNorm = 1.9256, lr_0 = 1.1437e-04
Loss = 3.0636e-01, PNorm = 63.7127, GNorm = 1.5190, lr_0 = 1.1429e-04
Loss = 3.5432e-01, PNorm = 63.7117, GNorm = 1.8111, lr_0 = 1.1421e-04
Loss = 3.1606e-01, PNorm = 63.7133, GNorm = 1.1539, lr_0 = 1.1413e-04
Loss = 3.4370e-01, PNorm = 63.7149, GNorm = 1.2222, lr_0 = 1.1405e-04
Loss = 3.5876e-01, PNorm = 63.7166, GNorm = 1.0634, lr_0 = 1.1398e-04
Loss = 3.6669e-01, PNorm = 63.7190, GNorm = 1.3320, lr_0 = 1.1390e-04
Loss = 3.0374e-01, PNorm = 63.7204, GNorm = 1.1388, lr_0 = 1.1382e-04
Loss = 4.1029e-01, PNorm = 63.7199, GNorm = 1.3963, lr_0 = 1.1374e-04
Loss = 3.2959e-01, PNorm = 63.7205, GNorm = 1.6196, lr_0 = 1.1366e-04
Loss = 3.0558e-01, PNorm = 63.7234, GNorm = 1.4125, lr_0 = 1.1359e-04
Loss = 3.4514e-01, PNorm = 63.7237, GNorm = 1.5995, lr_0 = 1.1351e-04
Loss = 3.3277e-01, PNorm = 63.7228, GNorm = 1.3608, lr_0 = 1.1343e-04
Loss = 3.5263e-01, PNorm = 63.7234, GNorm = 1.5481, lr_0 = 1.1335e-04
Loss = 3.5183e-01, PNorm = 63.7246, GNorm = 1.4084, lr_0 = 1.1328e-04
Loss = 3.5296e-01, PNorm = 63.7258, GNorm = 1.5123, lr_0 = 1.1320e-04
Loss = 3.6197e-01, PNorm = 63.7268, GNorm = 1.3903, lr_0 = 1.1312e-04
Loss = 3.8808e-01, PNorm = 63.7291, GNorm = 1.4431, lr_0 = 1.1304e-04
Loss = 3.7140e-01, PNorm = 63.7312, GNorm = 1.5163, lr_0 = 1.1297e-04
Loss = 3.5039e-01, PNorm = 63.7316, GNorm = 2.0024, lr_0 = 1.1289e-04
Loss = 3.6460e-01, PNorm = 63.7309, GNorm = 1.7959, lr_0 = 1.1281e-04
Loss = 3.3047e-01, PNorm = 63.7301, GNorm = 1.2449, lr_0 = 1.1273e-04
Loss = 3.1306e-01, PNorm = 63.7304, GNorm = 1.2870, lr_0 = 1.1266e-04
Loss = 3.1457e-01, PNorm = 63.7320, GNorm = 1.3793, lr_0 = 1.1258e-04
Loss = 3.6118e-01, PNorm = 63.7337, GNorm = 1.5960, lr_0 = 1.1250e-04
Loss = 3.9047e-01, PNorm = 63.7347, GNorm = 1.2412, lr_0 = 1.1243e-04
Loss = 3.6514e-01, PNorm = 63.7357, GNorm = 1.4457, lr_0 = 1.1235e-04
Loss = 3.4167e-01, PNorm = 63.7371, GNorm = 1.7398, lr_0 = 1.1227e-04
Loss = 3.4553e-01, PNorm = 63.7384, GNorm = 1.3225, lr_0 = 1.1219e-04
Loss = 3.5737e-01, PNorm = 63.7383, GNorm = 1.0097, lr_0 = 1.1212e-04
Loss = 3.5286e-01, PNorm = 63.7388, GNorm = 1.3340, lr_0 = 1.1204e-04
Loss = 3.5554e-01, PNorm = 63.7387, GNorm = 1.3433, lr_0 = 1.1196e-04
Loss = 2.9957e-01, PNorm = 63.7389, GNorm = 1.5363, lr_0 = 1.1189e-04
Loss = 3.4582e-01, PNorm = 63.7409, GNorm = 2.5452, lr_0 = 1.1181e-04
Loss = 3.4433e-01, PNorm = 63.7422, GNorm = 1.7605, lr_0 = 1.1173e-04
Loss = 2.8029e-01, PNorm = 63.7430, GNorm = 1.4666, lr_0 = 1.1166e-04
Loss = 3.5519e-01, PNorm = 63.7429, GNorm = 1.3719, lr_0 = 1.1158e-04
Loss = 3.5514e-01, PNorm = 63.7435, GNorm = 1.9044, lr_0 = 1.1150e-04
Loss = 4.2467e-01, PNorm = 63.7432, GNorm = 1.7120, lr_0 = 1.1143e-04
Loss = 3.4098e-01, PNorm = 63.7431, GNorm = 1.4173, lr_0 = 1.1135e-04
Loss = 3.0465e-01, PNorm = 63.7445, GNorm = 1.6682, lr_0 = 1.1128e-04
Loss = 3.8131e-01, PNorm = 63.7470, GNorm = 2.0287, lr_0 = 1.1120e-04
Loss = 3.1842e-01, PNorm = 63.7483, GNorm = 1.2147, lr_0 = 1.1112e-04
Loss = 3.6762e-01, PNorm = 63.7502, GNorm = 1.6520, lr_0 = 1.1105e-04
Loss = 3.3794e-01, PNorm = 63.7504, GNorm = 1.5937, lr_0 = 1.1097e-04
Loss = 3.3022e-01, PNorm = 63.7521, GNorm = 1.4140, lr_0 = 1.1089e-04
Loss = 3.3866e-01, PNorm = 63.7551, GNorm = 1.7578, lr_0 = 1.1082e-04
Loss = 4.5548e-01, PNorm = 63.7563, GNorm = 1.5333, lr_0 = 1.1074e-04
Loss = 3.2134e-01, PNorm = 63.7567, GNorm = 1.2637, lr_0 = 1.1067e-04
Loss = 3.3674e-01, PNorm = 63.7575, GNorm = 1.5468, lr_0 = 1.1059e-04
Loss = 4.3035e-01, PNorm = 63.7572, GNorm = 1.8715, lr_0 = 1.1052e-04
Loss = 3.4548e-01, PNorm = 63.7585, GNorm = 1.2632, lr_0 = 1.1044e-04
Loss = 3.2233e-01, PNorm = 63.7605, GNorm = 1.7013, lr_0 = 1.1036e-04
Loss = 2.8906e-01, PNorm = 63.7609, GNorm = 1.0777, lr_0 = 1.1029e-04
Loss = 3.2526e-01, PNorm = 63.7609, GNorm = 1.5744, lr_0 = 1.1021e-04
Loss = 3.1837e-01, PNorm = 63.7617, GNorm = 1.4995, lr_0 = 1.1014e-04
Loss = 3.5563e-01, PNorm = 63.7618, GNorm = 1.5437, lr_0 = 1.1006e-04
Loss = 2.8417e-01, PNorm = 63.7633, GNorm = 1.4452, lr_0 = 1.0999e-04
Loss = 3.4067e-01, PNorm = 63.7649, GNorm = 1.5219, lr_0 = 1.0991e-04
Loss = 3.7285e-01, PNorm = 63.7661, GNorm = 1.5854, lr_0 = 1.0984e-04
Loss = 3.3962e-01, PNorm = 63.7674, GNorm = 1.7115, lr_0 = 1.0976e-04
Loss = 3.5818e-01, PNorm = 63.7699, GNorm = 1.4920, lr_0 = 1.0969e-04
Loss = 3.7918e-01, PNorm = 63.7686, GNorm = 1.6947, lr_0 = 1.0961e-04
Loss = 3.6924e-01, PNorm = 63.7696, GNorm = 1.1547, lr_0 = 1.0954e-04
Loss = 3.6987e-01, PNorm = 63.7715, GNorm = 1.1591, lr_0 = 1.0946e-04
Loss = 3.4883e-01, PNorm = 63.7722, GNorm = 1.5959, lr_0 = 1.0939e-04
Loss = 3.3039e-01, PNorm = 63.7724, GNorm = 1.0984, lr_0 = 1.0931e-04
Loss = 3.3651e-01, PNorm = 63.7727, GNorm = 1.2908, lr_0 = 1.0924e-04
Loss = 3.7224e-01, PNorm = 63.7736, GNorm = 1.6174, lr_0 = 1.0916e-04
Loss = 3.4062e-01, PNorm = 63.7764, GNorm = 1.5359, lr_0 = 1.0909e-04
Loss = 3.1487e-01, PNorm = 63.7779, GNorm = 1.1963, lr_0 = 1.0901e-04
Loss = 3.2411e-01, PNorm = 63.7786, GNorm = 1.5479, lr_0 = 1.0894e-04
Loss = 3.7770e-01, PNorm = 63.7799, GNorm = 1.3443, lr_0 = 1.0886e-04
Loss = 3.3707e-01, PNorm = 63.7798, GNorm = 1.4262, lr_0 = 1.0879e-04
Loss = 3.7018e-01, PNorm = 63.7797, GNorm = 2.0718, lr_0 = 1.0871e-04
Loss = 3.8875e-01, PNorm = 63.7810, GNorm = 1.3675, lr_0 = 1.0864e-04
Loss = 3.4422e-01, PNorm = 63.7827, GNorm = 1.1981, lr_0 = 1.0856e-04
Validation mae = 0.110911
Epoch 29
Loss = 3.3241e-01, PNorm = 63.7849, GNorm = 1.3186, lr_0 = 1.0849e-04
Loss = 2.9909e-01, PNorm = 63.7884, GNorm = 1.8049, lr_0 = 1.0841e-04
Loss = 3.5832e-01, PNorm = 63.7889, GNorm = 1.2284, lr_0 = 1.0834e-04
Loss = 2.9866e-01, PNorm = 63.7889, GNorm = 1.6432, lr_0 = 1.0827e-04
Loss = 3.4347e-01, PNorm = 63.7895, GNorm = 1.4081, lr_0 = 1.0819e-04
Loss = 3.4249e-01, PNorm = 63.7911, GNorm = 1.8431, lr_0 = 1.0812e-04
Loss = 3.2968e-01, PNorm = 63.7912, GNorm = 1.5962, lr_0 = 1.0804e-04
Loss = 3.2420e-01, PNorm = 63.7923, GNorm = 1.4883, lr_0 = 1.0797e-04
Loss = 3.1763e-01, PNorm = 63.7924, GNorm = 1.6465, lr_0 = 1.0790e-04
Loss = 3.2777e-01, PNorm = 63.7934, GNorm = 1.2583, lr_0 = 1.0782e-04
Loss = 3.2638e-01, PNorm = 63.7953, GNorm = 1.5552, lr_0 = 1.0775e-04
Loss = 2.9297e-01, PNorm = 63.7957, GNorm = 0.9207, lr_0 = 1.0767e-04
Loss = 3.1281e-01, PNorm = 63.7960, GNorm = 1.4364, lr_0 = 1.0760e-04
Loss = 3.4911e-01, PNorm = 63.7951, GNorm = 1.3727, lr_0 = 1.0753e-04
Loss = 3.5704e-01, PNorm = 63.7946, GNorm = 1.1310, lr_0 = 1.0745e-04
Loss = 3.6104e-01, PNorm = 63.7949, GNorm = 1.3111, lr_0 = 1.0738e-04
Loss = 3.3073e-01, PNorm = 63.7953, GNorm = 1.5851, lr_0 = 1.0731e-04
Loss = 3.1794e-01, PNorm = 63.7969, GNorm = 1.4065, lr_0 = 1.0723e-04
Loss = 3.3384e-01, PNorm = 63.7981, GNorm = 1.5331, lr_0 = 1.0716e-04
Loss = 3.6531e-01, PNorm = 63.7985, GNorm = 1.2952, lr_0 = 1.0709e-04
Loss = 3.5065e-01, PNorm = 63.7997, GNorm = 1.1748, lr_0 = 1.0701e-04
Loss = 3.4754e-01, PNorm = 63.8004, GNorm = 1.5980, lr_0 = 1.0694e-04
Loss = 2.9816e-01, PNorm = 63.8016, GNorm = 1.4781, lr_0 = 1.0687e-04
Loss = 3.2870e-01, PNorm = 63.8019, GNorm = 1.3844, lr_0 = 1.0679e-04
Loss = 3.5072e-01, PNorm = 63.8024, GNorm = 1.4217, lr_0 = 1.0672e-04
Loss = 3.2423e-01, PNorm = 63.8026, GNorm = 1.7560, lr_0 = 1.0665e-04
Loss = 3.7896e-01, PNorm = 63.8027, GNorm = 1.7371, lr_0 = 1.0657e-04
Loss = 3.1864e-01, PNorm = 63.8046, GNorm = 1.9256, lr_0 = 1.0650e-04
Loss = 3.4924e-01, PNorm = 63.8049, GNorm = 1.5822, lr_0 = 1.0643e-04
Loss = 3.3389e-01, PNorm = 63.8065, GNorm = 1.4661, lr_0 = 1.0635e-04
Loss = 3.4534e-01, PNorm = 63.8068, GNorm = 1.0840, lr_0 = 1.0628e-04
Loss = 3.2626e-01, PNorm = 63.8083, GNorm = 1.3197, lr_0 = 1.0621e-04
Loss = 2.9631e-01, PNorm = 63.8091, GNorm = 1.3958, lr_0 = 1.0614e-04
Loss = 3.2566e-01, PNorm = 63.8110, GNorm = 1.5980, lr_0 = 1.0606e-04
Loss = 3.4460e-01, PNorm = 63.8111, GNorm = 1.4673, lr_0 = 1.0599e-04
Loss = 3.8072e-01, PNorm = 63.8103, GNorm = 1.5046, lr_0 = 1.0592e-04
Loss = 3.4407e-01, PNorm = 63.8103, GNorm = 1.3576, lr_0 = 1.0585e-04
Loss = 3.4684e-01, PNorm = 63.8124, GNorm = 1.8281, lr_0 = 1.0577e-04
Loss = 2.9873e-01, PNorm = 63.8153, GNorm = 1.1244, lr_0 = 1.0570e-04
Loss = 3.5644e-01, PNorm = 63.8166, GNorm = 1.5988, lr_0 = 1.0563e-04
Loss = 3.6504e-01, PNorm = 63.8176, GNorm = 1.3319, lr_0 = 1.0556e-04
Loss = 3.2554e-01, PNorm = 63.8188, GNorm = 1.9018, lr_0 = 1.0548e-04
Loss = 3.4250e-01, PNorm = 63.8202, GNorm = 1.6128, lr_0 = 1.0541e-04
Loss = 3.5725e-01, PNorm = 63.8211, GNorm = 1.6994, lr_0 = 1.0534e-04
Loss = 3.4594e-01, PNorm = 63.8211, GNorm = 1.4151, lr_0 = 1.0527e-04
Loss = 3.6028e-01, PNorm = 63.8230, GNorm = 1.9334, lr_0 = 1.0519e-04
Loss = 3.4920e-01, PNorm = 63.8242, GNorm = 1.6343, lr_0 = 1.0512e-04
Loss = 3.1235e-01, PNorm = 63.8261, GNorm = 1.2817, lr_0 = 1.0505e-04
Loss = 3.0150e-01, PNorm = 63.8282, GNorm = 1.1921, lr_0 = 1.0498e-04
Loss = 3.9052e-01, PNorm = 63.8289, GNorm = 1.1341, lr_0 = 1.0491e-04
Loss = 3.3372e-01, PNorm = 63.8299, GNorm = 1.3501, lr_0 = 1.0483e-04
Loss = 3.8574e-01, PNorm = 63.8332, GNorm = 1.8594, lr_0 = 1.0476e-04
Loss = 3.3757e-01, PNorm = 63.8345, GNorm = 1.3334, lr_0 = 1.0469e-04
Loss = 3.0859e-01, PNorm = 63.8344, GNorm = 1.4956, lr_0 = 1.0462e-04
Loss = 3.3979e-01, PNorm = 63.8359, GNorm = 1.4685, lr_0 = 1.0455e-04
Loss = 3.7349e-01, PNorm = 63.8364, GNorm = 1.1452, lr_0 = 1.0448e-04
Loss = 4.3181e-01, PNorm = 63.8376, GNorm = 1.5717, lr_0 = 1.0440e-04
Loss = 3.2692e-01, PNorm = 63.8406, GNorm = 2.3591, lr_0 = 1.0433e-04
Loss = 3.4977e-01, PNorm = 63.8427, GNorm = 1.1863, lr_0 = 1.0426e-04
Loss = 3.7941e-01, PNorm = 63.8436, GNorm = 1.3250, lr_0 = 1.0419e-04
Loss = 2.9754e-01, PNorm = 63.8446, GNorm = 1.2319, lr_0 = 1.0412e-04
Loss = 3.7900e-01, PNorm = 63.8445, GNorm = 1.7978, lr_0 = 1.0405e-04
Loss = 3.3517e-01, PNorm = 63.8426, GNorm = 1.0227, lr_0 = 1.0398e-04
Loss = 2.9749e-01, PNorm = 63.8421, GNorm = 1.1550, lr_0 = 1.0391e-04
Loss = 3.1653e-01, PNorm = 63.8420, GNorm = 1.7012, lr_0 = 1.0383e-04
Loss = 3.5605e-01, PNorm = 63.8434, GNorm = 1.7569, lr_0 = 1.0376e-04
Loss = 3.0752e-01, PNorm = 63.8443, GNorm = 1.7360, lr_0 = 1.0369e-04
Loss = 3.5567e-01, PNorm = 63.8451, GNorm = 2.5203, lr_0 = 1.0362e-04
Loss = 3.6633e-01, PNorm = 63.8466, GNorm = 1.5312, lr_0 = 1.0355e-04
Loss = 3.4012e-01, PNorm = 63.8473, GNorm = 1.6568, lr_0 = 1.0348e-04
Loss = 3.3783e-01, PNorm = 63.8479, GNorm = 1.2907, lr_0 = 1.0341e-04
Loss = 3.7654e-01, PNorm = 63.8478, GNorm = 1.5895, lr_0 = 1.0334e-04
Loss = 3.1504e-01, PNorm = 63.8483, GNorm = 1.3678, lr_0 = 1.0327e-04
Loss = 3.5108e-01, PNorm = 63.8493, GNorm = 1.3746, lr_0 = 1.0320e-04
Loss = 3.4525e-01, PNorm = 63.8492, GNorm = 1.4124, lr_0 = 1.0312e-04
Loss = 2.7373e-01, PNorm = 63.8498, GNorm = 1.3940, lr_0 = 1.0305e-04
Loss = 3.2933e-01, PNorm = 63.8519, GNorm = 1.5386, lr_0 = 1.0298e-04
Loss = 3.2977e-01, PNorm = 63.8537, GNorm = 3.2390, lr_0 = 1.0291e-04
Loss = 4.3077e-01, PNorm = 63.8545, GNorm = 1.6507, lr_0 = 1.0284e-04
Loss = 3.5155e-01, PNorm = 63.8547, GNorm = 1.3271, lr_0 = 1.0277e-04
Loss = 3.3684e-01, PNorm = 63.8558, GNorm = 1.9946, lr_0 = 1.0270e-04
Loss = 3.5490e-01, PNorm = 63.8557, GNorm = 1.3089, lr_0 = 1.0263e-04
Loss = 3.3871e-01, PNorm = 63.8568, GNorm = 1.3684, lr_0 = 1.0256e-04
Loss = 3.6850e-01, PNorm = 63.8586, GNorm = 1.5952, lr_0 = 1.0249e-04
Loss = 3.4413e-01, PNorm = 63.8566, GNorm = 1.7087, lr_0 = 1.0242e-04
Loss = 3.8385e-01, PNorm = 63.8559, GNorm = 1.2214, lr_0 = 1.0235e-04
Loss = 3.3189e-01, PNorm = 63.8580, GNorm = 1.9268, lr_0 = 1.0228e-04
Loss = 3.4908e-01, PNorm = 63.8600, GNorm = 1.5100, lr_0 = 1.0221e-04
Loss = 3.3936e-01, PNorm = 63.8605, GNorm = 1.2660, lr_0 = 1.0214e-04
Loss = 3.3949e-01, PNorm = 63.8619, GNorm = 1.6109, lr_0 = 1.0207e-04
Loss = 3.3471e-01, PNorm = 63.8626, GNorm = 1.6060, lr_0 = 1.0200e-04
Loss = 3.6237e-01, PNorm = 63.8627, GNorm = 1.7250, lr_0 = 1.0193e-04
Loss = 3.3710e-01, PNorm = 63.8634, GNorm = 1.2055, lr_0 = 1.0186e-04
Loss = 3.4647e-01, PNorm = 63.8646, GNorm = 1.1402, lr_0 = 1.0179e-04
Loss = 3.7826e-01, PNorm = 63.8666, GNorm = 1.2962, lr_0 = 1.0172e-04
Loss = 3.6552e-01, PNorm = 63.8686, GNorm = 1.7277, lr_0 = 1.0165e-04
Loss = 3.7207e-01, PNorm = 63.8700, GNorm = 1.3389, lr_0 = 1.0158e-04
Loss = 3.3873e-01, PNorm = 63.8703, GNorm = 1.7986, lr_0 = 1.0151e-04
Loss = 3.8709e-01, PNorm = 63.8694, GNorm = 1.8548, lr_0 = 1.0144e-04
Loss = 3.5596e-01, PNorm = 63.8698, GNorm = 1.2914, lr_0 = 1.0137e-04
Loss = 3.5907e-01, PNorm = 63.8711, GNorm = 1.3466, lr_0 = 1.0130e-04
Loss = 3.5998e-01, PNorm = 63.8729, GNorm = 1.1998, lr_0 = 1.0123e-04
Loss = 3.5060e-01, PNorm = 63.8734, GNorm = 1.2292, lr_0 = 1.0116e-04
Loss = 4.0916e-01, PNorm = 63.8750, GNorm = 1.5169, lr_0 = 1.0110e-04
Loss = 3.7130e-01, PNorm = 63.8752, GNorm = 1.3104, lr_0 = 1.0103e-04
Loss = 3.5025e-01, PNorm = 63.8741, GNorm = 1.2023, lr_0 = 1.0096e-04
Loss = 3.4099e-01, PNorm = 63.8752, GNorm = 1.5981, lr_0 = 1.0089e-04
Loss = 2.9400e-01, PNorm = 63.8760, GNorm = 1.3638, lr_0 = 1.0082e-04
Loss = 3.2576e-01, PNorm = 63.8758, GNorm = 1.4181, lr_0 = 1.0075e-04
Loss = 3.3265e-01, PNorm = 63.8760, GNorm = 1.1255, lr_0 = 1.0068e-04
Loss = 3.3194e-01, PNorm = 63.8763, GNorm = 2.3161, lr_0 = 1.0061e-04
Loss = 3.4798e-01, PNorm = 63.8766, GNorm = 1.3991, lr_0 = 1.0054e-04
Loss = 3.2213e-01, PNorm = 63.8767, GNorm = 1.6048, lr_0 = 1.0047e-04
Loss = 3.0074e-01, PNorm = 63.8762, GNorm = 1.3709, lr_0 = 1.0041e-04
Loss = 3.6553e-01, PNorm = 63.8755, GNorm = 1.6079, lr_0 = 1.0034e-04
Loss = 3.0485e-01, PNorm = 63.8765, GNorm = 1.2874, lr_0 = 1.0027e-04
Loss = 3.3143e-01, PNorm = 63.8783, GNorm = 1.6652, lr_0 = 1.0020e-04
Loss = 3.2453e-01, PNorm = 63.8794, GNorm = 1.2871, lr_0 = 1.0013e-04
Loss = 3.8234e-01, PNorm = 63.8797, GNorm = 1.5976, lr_0 = 1.0006e-04
Loss = 3.3664e-01, PNorm = 63.8803, GNorm = 1.7370, lr_0 = 1.0000e-04
Validation mae = 0.110572
Model 0 best validation mae = 0.110572 on epoch 29
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.110286
Ensemble test mae = 0.110286
Fold 9
Splitting data with seed 9
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.2, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=300, bias=False)
        (W_h): Linear(in_features=300, out_features=300, bias=False)
        (W_o): Linear(in_features=433, out_features=300, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.2, inplace=False)
    (1): Linear(in_features=300, out_features=300, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=300, out_features=300, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.2, inplace=False)
    (7): Linear(in_features=300, out_features=1, bias=True)
  )
)
Number of parameters = 445,201
Moving model to cuda
Epoch 0
Loss = 1.1370e+00, PNorm = 38.1704, GNorm = 4.5832, lr_0 = 1.0413e-04
Loss = 1.0944e+00, PNorm = 38.1696, GNorm = 3.5618, lr_0 = 1.0788e-04
Loss = 9.2282e-01, PNorm = 38.1693, GNorm = 1.9259, lr_0 = 1.1163e-04
Loss = 9.1047e-01, PNorm = 38.1696, GNorm = 2.7924, lr_0 = 1.1537e-04
Loss = 9.2325e-01, PNorm = 38.1704, GNorm = 3.8378, lr_0 = 1.1913e-04
Loss = 9.9446e-01, PNorm = 38.1714, GNorm = 2.9154, lr_0 = 1.2287e-04
Loss = 9.5521e-01, PNorm = 38.1720, GNorm = 2.1801, lr_0 = 1.2663e-04
Loss = 9.3866e-01, PNorm = 38.1727, GNorm = 2.1779, lr_0 = 1.3038e-04
Loss = 9.0206e-01, PNorm = 38.1737, GNorm = 1.9921, lr_0 = 1.3413e-04
Loss = 9.1941e-01, PNorm = 38.1745, GNorm = 3.5802, lr_0 = 1.3788e-04
Loss = 8.9948e-01, PNorm = 38.1758, GNorm = 3.2464, lr_0 = 1.4163e-04
Loss = 7.6800e-01, PNorm = 38.1769, GNorm = 1.8986, lr_0 = 1.4537e-04
Loss = 8.8393e-01, PNorm = 38.1787, GNorm = 5.0033, lr_0 = 1.4913e-04
Loss = 9.4883e-01, PNorm = 38.1807, GNorm = 2.1072, lr_0 = 1.5288e-04
Loss = 9.9224e-01, PNorm = 38.1829, GNorm = 2.7155, lr_0 = 1.5662e-04
Loss = 7.1147e-01, PNorm = 38.1851, GNorm = 1.9192, lr_0 = 1.6038e-04
Loss = 8.6763e-01, PNorm = 38.1875, GNorm = 2.7225, lr_0 = 1.6412e-04
Loss = 8.1213e-01, PNorm = 38.1909, GNorm = 11.2497, lr_0 = 1.6788e-04
Loss = 8.3851e-01, PNorm = 38.1931, GNorm = 2.0140, lr_0 = 1.7163e-04
Loss = 7.9988e-01, PNorm = 38.1957, GNorm = 6.5782, lr_0 = 1.7538e-04
Loss = 8.1715e-01, PNorm = 38.1994, GNorm = 2.8043, lr_0 = 1.7913e-04
Loss = 8.4356e-01, PNorm = 38.2030, GNorm = 2.5188, lr_0 = 1.8288e-04
Loss = 6.6484e-01, PNorm = 38.2063, GNorm = 3.5469, lr_0 = 1.8662e-04
Loss = 7.2696e-01, PNorm = 38.2094, GNorm = 3.0732, lr_0 = 1.9038e-04
Loss = 7.6129e-01, PNorm = 38.2114, GNorm = 3.3665, lr_0 = 1.9413e-04
Loss = 7.8153e-01, PNorm = 38.2125, GNorm = 1.5433, lr_0 = 1.9788e-04
Loss = 8.0947e-01, PNorm = 38.2159, GNorm = 2.9979, lr_0 = 2.0163e-04
Loss = 7.0995e-01, PNorm = 38.2198, GNorm = 1.6900, lr_0 = 2.0537e-04
Loss = 7.8342e-01, PNorm = 38.2240, GNorm = 5.1046, lr_0 = 2.0913e-04
Loss = 7.7649e-01, PNorm = 38.2281, GNorm = 2.6193, lr_0 = 2.1288e-04
Loss = 7.1460e-01, PNorm = 38.2326, GNorm = 2.0192, lr_0 = 2.1663e-04
Loss = 6.9799e-01, PNorm = 38.2357, GNorm = 2.8240, lr_0 = 2.2038e-04
Loss = 7.4407e-01, PNorm = 38.2382, GNorm = 2.7994, lr_0 = 2.2412e-04
Loss = 7.7080e-01, PNorm = 38.2403, GNorm = 3.6452, lr_0 = 2.2787e-04
Loss = 6.7061e-01, PNorm = 38.2439, GNorm = 1.7533, lr_0 = 2.3163e-04
Loss = 6.5004e-01, PNorm = 38.2459, GNorm = 1.7275, lr_0 = 2.3538e-04
Loss = 5.9750e-01, PNorm = 38.2499, GNorm = 3.0119, lr_0 = 2.3913e-04
Loss = 7.0287e-01, PNorm = 38.2562, GNorm = 1.6385, lr_0 = 2.4288e-04
Loss = 6.9013e-01, PNorm = 38.2589, GNorm = 5.7263, lr_0 = 2.4662e-04
Loss = 6.6088e-01, PNorm = 38.2612, GNorm = 6.1367, lr_0 = 2.5038e-04
Loss = 7.1983e-01, PNorm = 38.2645, GNorm = 7.7572, lr_0 = 2.5413e-04
Loss = 6.9495e-01, PNorm = 38.2661, GNorm = 3.8148, lr_0 = 2.5788e-04
Loss = 7.1933e-01, PNorm = 38.2711, GNorm = 3.7822, lr_0 = 2.6163e-04
Loss = 6.9393e-01, PNorm = 38.2742, GNorm = 1.6260, lr_0 = 2.6537e-04
Loss = 7.4428e-01, PNorm = 38.2785, GNorm = 1.9661, lr_0 = 2.6912e-04
Loss = 7.4645e-01, PNorm = 38.2817, GNorm = 2.8700, lr_0 = 2.7288e-04
Loss = 6.7615e-01, PNorm = 38.2854, GNorm = 1.6366, lr_0 = 2.7663e-04
Loss = 7.0844e-01, PNorm = 38.2876, GNorm = 2.6195, lr_0 = 2.8038e-04
Loss = 7.9296e-01, PNorm = 38.2900, GNorm = 10.5209, lr_0 = 2.8413e-04
Loss = 7.8205e-01, PNorm = 38.2920, GNorm = 1.8546, lr_0 = 2.8787e-04
Loss = 7.9891e-01, PNorm = 38.2940, GNorm = 8.3000, lr_0 = 2.9163e-04
Loss = 5.8836e-01, PNorm = 38.2981, GNorm = 2.0042, lr_0 = 2.9538e-04
Loss = 6.8432e-01, PNorm = 38.3041, GNorm = 2.3989, lr_0 = 2.9913e-04
Loss = 6.8722e-01, PNorm = 38.3106, GNorm = 3.6360, lr_0 = 3.0288e-04
Loss = 6.6108e-01, PNorm = 38.3164, GNorm = 1.1176, lr_0 = 3.0662e-04
Loss = 6.6979e-01, PNorm = 38.3197, GNorm = 3.0524, lr_0 = 3.1037e-04
Loss = 6.5599e-01, PNorm = 38.3250, GNorm = 2.9423, lr_0 = 3.1413e-04
Loss = 6.3796e-01, PNorm = 38.3304, GNorm = 5.4247, lr_0 = 3.1788e-04
Loss = 6.6951e-01, PNorm = 38.3348, GNorm = 2.2691, lr_0 = 3.2163e-04
Loss = 6.5233e-01, PNorm = 38.3393, GNorm = 1.7078, lr_0 = 3.2538e-04
Loss = 7.5250e-01, PNorm = 38.3444, GNorm = 4.4221, lr_0 = 3.2912e-04
Loss = 7.8240e-01, PNorm = 38.3479, GNorm = 6.2343, lr_0 = 3.3288e-04
Loss = 6.5848e-01, PNorm = 38.3520, GNorm = 1.8864, lr_0 = 3.3663e-04
Loss = 6.5710e-01, PNorm = 38.3563, GNorm = 1.4544, lr_0 = 3.4038e-04
Loss = 7.2569e-01, PNorm = 38.3617, GNorm = 3.7897, lr_0 = 3.4413e-04
Loss = 6.7712e-01, PNorm = 38.3643, GNorm = 3.4054, lr_0 = 3.4787e-04
Loss = 6.1952e-01, PNorm = 38.3695, GNorm = 2.1958, lr_0 = 3.5162e-04
Loss = 6.1937e-01, PNorm = 38.3756, GNorm = 11.0706, lr_0 = 3.5538e-04
Loss = 7.3506e-01, PNorm = 38.3782, GNorm = 3.8727, lr_0 = 3.5913e-04
Loss = 7.0075e-01, PNorm = 38.3814, GNorm = 2.6095, lr_0 = 3.6288e-04
Loss = 7.0523e-01, PNorm = 38.3867, GNorm = 2.1751, lr_0 = 3.6662e-04
Loss = 7.1396e-01, PNorm = 38.3912, GNorm = 2.4953, lr_0 = 3.7037e-04
Loss = 5.7063e-01, PNorm = 38.3983, GNorm = 2.1977, lr_0 = 3.7413e-04
Loss = 6.7603e-01, PNorm = 38.4032, GNorm = 5.6441, lr_0 = 3.7788e-04
Loss = 6.9922e-01, PNorm = 38.4079, GNorm = 3.8527, lr_0 = 3.8163e-04
Loss = 6.2597e-01, PNorm = 38.4177, GNorm = 3.2750, lr_0 = 3.8537e-04
Loss = 6.2355e-01, PNorm = 38.4241, GNorm = 4.8741, lr_0 = 3.8912e-04
Loss = 6.7829e-01, PNorm = 38.4307, GNorm = 3.7380, lr_0 = 3.9287e-04
Loss = 7.6866e-01, PNorm = 38.4368, GNorm = 2.0908, lr_0 = 3.9663e-04
Loss = 6.5625e-01, PNorm = 38.4430, GNorm = 4.6647, lr_0 = 4.0038e-04
Loss = 6.9704e-01, PNorm = 38.4511, GNorm = 3.0602, lr_0 = 4.0413e-04
Loss = 6.0986e-01, PNorm = 38.4583, GNorm = 2.2202, lr_0 = 4.0787e-04
Loss = 6.7105e-01, PNorm = 38.4621, GNorm = 1.6293, lr_0 = 4.1162e-04
Loss = 6.0402e-01, PNorm = 38.4656, GNorm = 4.8817, lr_0 = 4.1537e-04
Loss = 7.0674e-01, PNorm = 38.4726, GNorm = 3.4319, lr_0 = 4.1913e-04
Loss = 5.9397e-01, PNorm = 38.4814, GNorm = 1.9048, lr_0 = 4.2288e-04
Loss = 6.4293e-01, PNorm = 38.4905, GNorm = 3.8751, lr_0 = 4.2662e-04
Loss = 6.6041e-01, PNorm = 38.5005, GNorm = 4.2554, lr_0 = 4.3037e-04
Loss = 6.8397e-01, PNorm = 38.5045, GNorm = 7.6720, lr_0 = 4.3412e-04
Loss = 7.5566e-01, PNorm = 38.5098, GNorm = 5.4558, lr_0 = 4.3788e-04
Loss = 6.4175e-01, PNorm = 38.5196, GNorm = 3.7129, lr_0 = 4.4163e-04
Loss = 6.9901e-01, PNorm = 38.5250, GNorm = 3.3751, lr_0 = 4.4538e-04
Loss = 7.4134e-01, PNorm = 38.5304, GNorm = 1.6427, lr_0 = 4.4912e-04
Loss = 6.8892e-01, PNorm = 38.5392, GNorm = 2.0538, lr_0 = 4.5287e-04
Loss = 6.4566e-01, PNorm = 38.5510, GNorm = 4.1702, lr_0 = 4.5662e-04
Loss = 7.2525e-01, PNorm = 38.5575, GNorm = 2.6659, lr_0 = 4.6038e-04
Loss = 5.9045e-01, PNorm = 38.5653, GNorm = 1.9296, lr_0 = 4.6413e-04
Loss = 6.7112e-01, PNorm = 38.5711, GNorm = 2.1264, lr_0 = 4.6787e-04
Loss = 6.7126e-01, PNorm = 38.5773, GNorm = 3.2028, lr_0 = 4.7162e-04
Loss = 5.9490e-01, PNorm = 38.5874, GNorm = 2.6838, lr_0 = 4.7537e-04
Loss = 6.6952e-01, PNorm = 38.5927, GNorm = 3.3400, lr_0 = 4.7913e-04
Loss = 6.6203e-01, PNorm = 38.6001, GNorm = 2.5706, lr_0 = 4.8288e-04
Loss = 7.5272e-01, PNorm = 38.6063, GNorm = 7.2152, lr_0 = 4.8663e-04
Loss = 6.7912e-01, PNorm = 38.6145, GNorm = 1.5176, lr_0 = 4.9038e-04
Loss = 5.9565e-01, PNorm = 38.6257, GNorm = 1.4579, lr_0 = 4.9412e-04
Loss = 5.9457e-01, PNorm = 38.6337, GNorm = 1.7930, lr_0 = 4.9788e-04
Loss = 6.8883e-01, PNorm = 38.6452, GNorm = 3.4282, lr_0 = 5.0163e-04
Loss = 7.1314e-01, PNorm = 38.6532, GNorm = 2.6200, lr_0 = 5.0538e-04
Loss = 6.5806e-01, PNorm = 38.6634, GNorm = 5.6477, lr_0 = 5.0913e-04
Loss = 6.0350e-01, PNorm = 38.6774, GNorm = 3.4196, lr_0 = 5.1287e-04
Loss = 5.9059e-01, PNorm = 38.6860, GNorm = 1.3053, lr_0 = 5.1663e-04
Loss = 6.7194e-01, PNorm = 38.6943, GNorm = 1.7350, lr_0 = 5.2038e-04
Loss = 5.3270e-01, PNorm = 38.7034, GNorm = 3.5111, lr_0 = 5.2413e-04
Loss = 7.7948e-01, PNorm = 38.7088, GNorm = 2.7720, lr_0 = 5.2788e-04
Loss = 6.3488e-01, PNorm = 38.7157, GNorm = 3.2163, lr_0 = 5.3162e-04
Loss = 6.4683e-01, PNorm = 38.7218, GNorm = 1.3124, lr_0 = 5.3538e-04
Loss = 6.2380e-01, PNorm = 38.7313, GNorm = 1.4490, lr_0 = 5.3912e-04
Loss = 7.3518e-01, PNorm = 38.7435, GNorm = 1.8104, lr_0 = 5.4288e-04
Loss = 5.7859e-01, PNorm = 38.7536, GNorm = 1.7632, lr_0 = 5.4663e-04
Loss = 5.6350e-01, PNorm = 38.7627, GNorm = 1.6913, lr_0 = 5.5038e-04
Validation mae = 0.139894
Epoch 1
Loss = 6.1651e-01, PNorm = 38.7680, GNorm = 6.2758, lr_0 = 5.5413e-04
Loss = 5.4347e-01, PNorm = 38.7781, GNorm = 1.4987, lr_0 = 5.5787e-04
Loss = 5.9716e-01, PNorm = 38.7894, GNorm = 3.1666, lr_0 = 5.6163e-04
Loss = 6.9078e-01, PNorm = 38.8016, GNorm = 7.9298, lr_0 = 5.6538e-04
Loss = 5.7438e-01, PNorm = 38.8114, GNorm = 5.2974, lr_0 = 5.6913e-04
Loss = 6.1799e-01, PNorm = 38.8283, GNorm = 6.3335, lr_0 = 5.7288e-04
Loss = 6.3372e-01, PNorm = 38.8417, GNorm = 2.3920, lr_0 = 5.7662e-04
Loss = 5.8741e-01, PNorm = 38.8529, GNorm = 1.5776, lr_0 = 5.8038e-04
Loss = 5.7813e-01, PNorm = 38.8681, GNorm = 4.8757, lr_0 = 5.8413e-04
Loss = 7.6574e-01, PNorm = 38.8745, GNorm = 2.6394, lr_0 = 5.8788e-04
Loss = 5.7424e-01, PNorm = 38.8867, GNorm = 2.5414, lr_0 = 5.9163e-04
Loss = 6.0837e-01, PNorm = 38.8984, GNorm = 3.5083, lr_0 = 5.9538e-04
Loss = 5.2118e-01, PNorm = 38.9086, GNorm = 3.3316, lr_0 = 5.9913e-04
Loss = 6.3407e-01, PNorm = 38.9231, GNorm = 3.0095, lr_0 = 6.0288e-04
Loss = 6.1117e-01, PNorm = 38.9370, GNorm = 1.1652, lr_0 = 6.0663e-04
Loss = 5.8505e-01, PNorm = 38.9496, GNorm = 2.8417, lr_0 = 6.1038e-04
Loss = 5.9265e-01, PNorm = 38.9614, GNorm = 2.2104, lr_0 = 6.1413e-04
Loss = 5.8853e-01, PNorm = 38.9780, GNorm = 3.1164, lr_0 = 6.1788e-04
Loss = 6.5402e-01, PNorm = 38.9910, GNorm = 4.4557, lr_0 = 6.2163e-04
Loss = 5.5377e-01, PNorm = 39.0041, GNorm = 7.3994, lr_0 = 6.2538e-04
Loss = 5.8274e-01, PNorm = 39.0155, GNorm = 2.4586, lr_0 = 6.2913e-04
Loss = 5.9073e-01, PNorm = 39.0223, GNorm = 1.9106, lr_0 = 6.3288e-04
Loss = 5.9534e-01, PNorm = 39.0363, GNorm = 4.8620, lr_0 = 6.3663e-04
Loss = 6.0217e-01, PNorm = 39.0470, GNorm = 2.1816, lr_0 = 6.4038e-04
Loss = 6.2191e-01, PNorm = 39.0546, GNorm = 1.9655, lr_0 = 6.4413e-04
Loss = 6.3187e-01, PNorm = 39.0666, GNorm = 2.2977, lr_0 = 6.4788e-04
Loss = 5.7044e-01, PNorm = 39.0795, GNorm = 1.8390, lr_0 = 6.5163e-04
Loss = 6.2802e-01, PNorm = 39.0922, GNorm = 3.2773, lr_0 = 6.5538e-04
Loss = 5.3455e-01, PNorm = 39.1028, GNorm = 5.9713, lr_0 = 6.5913e-04
Loss = 5.4653e-01, PNorm = 39.1126, GNorm = 1.4914, lr_0 = 6.6288e-04
Loss = 6.3826e-01, PNorm = 39.1258, GNorm = 1.4856, lr_0 = 6.6663e-04
Loss = 5.6436e-01, PNorm = 39.1367, GNorm = 5.6749, lr_0 = 6.7038e-04
Loss = 6.0655e-01, PNorm = 39.1505, GNorm = 4.1245, lr_0 = 6.7413e-04
Loss = 5.8201e-01, PNorm = 39.1641, GNorm = 2.7751, lr_0 = 6.7788e-04
Loss = 6.5881e-01, PNorm = 39.1807, GNorm = 4.8697, lr_0 = 6.8163e-04
Loss = 5.7578e-01, PNorm = 39.1895, GNorm = 3.3398, lr_0 = 6.8538e-04
Loss = 6.2826e-01, PNorm = 39.2043, GNorm = 1.3945, lr_0 = 6.8913e-04
Loss = 6.2328e-01, PNorm = 39.2185, GNorm = 5.7105, lr_0 = 6.9288e-04
Loss = 7.3386e-01, PNorm = 39.2312, GNorm = 1.9288, lr_0 = 6.9663e-04
Loss = 6.8206e-01, PNorm = 39.2398, GNorm = 2.9859, lr_0 = 7.0038e-04
Loss = 6.1190e-01, PNorm = 39.2561, GNorm = 1.4343, lr_0 = 7.0413e-04
Loss = 5.8377e-01, PNorm = 39.2753, GNorm = 1.3464, lr_0 = 7.0788e-04
Loss = 5.6517e-01, PNorm = 39.2873, GNorm = 0.9360, lr_0 = 7.1163e-04
Loss = 6.0335e-01, PNorm = 39.2974, GNorm = 2.8839, lr_0 = 7.1538e-04
Loss = 6.0857e-01, PNorm = 39.3069, GNorm = 2.4298, lr_0 = 7.1913e-04
Loss = 5.9351e-01, PNorm = 39.3186, GNorm = 3.9839, lr_0 = 7.2288e-04
Loss = 6.1534e-01, PNorm = 39.3345, GNorm = 2.3429, lr_0 = 7.2663e-04
Loss = 6.3630e-01, PNorm = 39.3464, GNorm = 2.7571, lr_0 = 7.3038e-04
Loss = 5.7472e-01, PNorm = 39.3573, GNorm = 2.4429, lr_0 = 7.3413e-04
Loss = 6.1098e-01, PNorm = 39.3727, GNorm = 5.6747, lr_0 = 7.3788e-04
Loss = 6.1058e-01, PNorm = 39.3836, GNorm = 1.3033, lr_0 = 7.4163e-04
Loss = 6.1963e-01, PNorm = 39.3952, GNorm = 2.2336, lr_0 = 7.4538e-04
Loss = 5.8595e-01, PNorm = 39.4132, GNorm = 2.0618, lr_0 = 7.4913e-04
Loss = 5.6853e-01, PNorm = 39.4212, GNorm = 1.6578, lr_0 = 7.5288e-04
Loss = 6.2019e-01, PNorm = 39.4377, GNorm = 2.5751, lr_0 = 7.5663e-04
Loss = 5.8671e-01, PNorm = 39.4481, GNorm = 5.5086, lr_0 = 7.6038e-04
Loss = 5.8273e-01, PNorm = 39.4720, GNorm = 2.4386, lr_0 = 7.6413e-04
Loss = 6.3434e-01, PNorm = 39.4834, GNorm = 1.8310, lr_0 = 7.6788e-04
Loss = 5.7750e-01, PNorm = 39.4980, GNorm = 1.1931, lr_0 = 7.7163e-04
Loss = 5.4167e-01, PNorm = 39.5087, GNorm = 1.5423, lr_0 = 7.7538e-04
Loss = 6.6971e-01, PNorm = 39.5184, GNorm = 1.4613, lr_0 = 7.7913e-04
Loss = 5.7920e-01, PNorm = 39.5320, GNorm = 2.1894, lr_0 = 7.8288e-04
Loss = 6.2965e-01, PNorm = 39.5431, GNorm = 3.5598, lr_0 = 7.8663e-04
Loss = 5.7460e-01, PNorm = 39.5612, GNorm = 4.0405, lr_0 = 7.9038e-04
Loss = 6.4407e-01, PNorm = 39.5731, GNorm = 1.1724, lr_0 = 7.9413e-04
Loss = 5.7494e-01, PNorm = 39.5937, GNorm = 1.8999, lr_0 = 7.9788e-04
Loss = 5.5141e-01, PNorm = 39.6028, GNorm = 2.4387, lr_0 = 8.0163e-04
Loss = 5.6819e-01, PNorm = 39.6207, GNorm = 2.1504, lr_0 = 8.0538e-04
Loss = 6.7981e-01, PNorm = 39.6422, GNorm = 1.9487, lr_0 = 8.0913e-04
Loss = 6.2962e-01, PNorm = 39.6666, GNorm = 5.0161, lr_0 = 8.1288e-04
Loss = 5.1733e-01, PNorm = 39.6743, GNorm = 3.8798, lr_0 = 8.1663e-04
Loss = 6.4847e-01, PNorm = 39.6913, GNorm = 1.4345, lr_0 = 8.2038e-04
Loss = 6.4148e-01, PNorm = 39.7112, GNorm = 5.5626, lr_0 = 8.2413e-04
Loss = 5.4946e-01, PNorm = 39.7284, GNorm = 2.1582, lr_0 = 8.2788e-04
Loss = 6.1470e-01, PNorm = 39.7461, GNorm = 1.4751, lr_0 = 8.3163e-04
Loss = 5.6085e-01, PNorm = 39.7661, GNorm = 4.3141, lr_0 = 8.3538e-04
Loss = 6.4143e-01, PNorm = 39.7784, GNorm = 3.0453, lr_0 = 8.3913e-04
Loss = 5.3287e-01, PNorm = 39.7997, GNorm = 2.7546, lr_0 = 8.4288e-04
Loss = 5.0630e-01, PNorm = 39.8097, GNorm = 1.1143, lr_0 = 8.4663e-04
Loss = 5.2148e-01, PNorm = 39.8279, GNorm = 2.1088, lr_0 = 8.5038e-04
Loss = 5.4640e-01, PNorm = 39.8432, GNorm = 2.2477, lr_0 = 8.5413e-04
Loss = 6.0249e-01, PNorm = 39.8589, GNorm = 3.0267, lr_0 = 8.5788e-04
Loss = 5.1604e-01, PNorm = 39.8847, GNorm = 1.3025, lr_0 = 8.6163e-04
Loss = 6.0111e-01, PNorm = 39.9095, GNorm = 3.0043, lr_0 = 8.6538e-04
Loss = 5.6276e-01, PNorm = 39.9259, GNorm = 1.7738, lr_0 = 8.6913e-04
Loss = 5.5463e-01, PNorm = 39.9399, GNorm = 1.4125, lr_0 = 8.7288e-04
Loss = 6.6996e-01, PNorm = 39.9595, GNorm = 2.0073, lr_0 = 8.7663e-04
Loss = 6.6426e-01, PNorm = 39.9725, GNorm = 2.6726, lr_0 = 8.8038e-04
Loss = 5.4595e-01, PNorm = 39.9920, GNorm = 0.8693, lr_0 = 8.8413e-04
Loss = 5.7728e-01, PNorm = 40.0126, GNorm = 1.2615, lr_0 = 8.8788e-04
Loss = 6.1913e-01, PNorm = 40.0310, GNorm = 1.4272, lr_0 = 8.9163e-04
Loss = 6.5052e-01, PNorm = 40.0512, GNorm = 6.6906, lr_0 = 8.9538e-04
Loss = 5.5830e-01, PNorm = 40.0708, GNorm = 2.0810, lr_0 = 8.9913e-04
Loss = 4.7034e-01, PNorm = 40.0948, GNorm = 2.7038, lr_0 = 9.0288e-04
Loss = 5.6598e-01, PNorm = 40.1061, GNorm = 2.1191, lr_0 = 9.0663e-04
Loss = 5.9931e-01, PNorm = 40.1218, GNorm = 1.2403, lr_0 = 9.1038e-04
Loss = 5.6364e-01, PNorm = 40.1434, GNorm = 3.3366, lr_0 = 9.1413e-04
Loss = 6.3761e-01, PNorm = 40.1655, GNorm = 2.3368, lr_0 = 9.1788e-04
Loss = 5.6935e-01, PNorm = 40.1898, GNorm = 2.4894, lr_0 = 9.2163e-04
Loss = 5.3910e-01, PNorm = 40.2075, GNorm = 4.0151, lr_0 = 9.2538e-04
Loss = 6.1746e-01, PNorm = 40.2165, GNorm = 1.3468, lr_0 = 9.2913e-04
Loss = 5.5950e-01, PNorm = 40.2343, GNorm = 1.0145, lr_0 = 9.3288e-04
Loss = 5.5495e-01, PNorm = 40.2616, GNorm = 2.4016, lr_0 = 9.3663e-04
Loss = 5.1575e-01, PNorm = 40.2770, GNorm = 1.1374, lr_0 = 9.4038e-04
Loss = 5.6257e-01, PNorm = 40.2871, GNorm = 1.8886, lr_0 = 9.4413e-04
Loss = 5.5202e-01, PNorm = 40.3033, GNorm = 1.4855, lr_0 = 9.4788e-04
Loss = 5.7526e-01, PNorm = 40.3215, GNorm = 1.5733, lr_0 = 9.5163e-04
Loss = 5.4624e-01, PNorm = 40.3443, GNorm = 1.4803, lr_0 = 9.5538e-04
Loss = 5.7262e-01, PNorm = 40.3655, GNorm = 2.0567, lr_0 = 9.5913e-04
Loss = 5.1720e-01, PNorm = 40.3847, GNorm = 2.4580, lr_0 = 9.6288e-04
Loss = 5.6040e-01, PNorm = 40.4087, GNorm = 1.5923, lr_0 = 9.6663e-04
Loss = 5.2645e-01, PNorm = 40.4242, GNorm = 4.8222, lr_0 = 9.7038e-04
Loss = 6.5783e-01, PNorm = 40.4509, GNorm = 5.1734, lr_0 = 9.7413e-04
Loss = 5.9748e-01, PNorm = 40.4815, GNorm = 3.2909, lr_0 = 9.7788e-04
Loss = 6.2432e-01, PNorm = 40.5095, GNorm = 2.4062, lr_0 = 9.8163e-04
Loss = 5.8515e-01, PNorm = 40.5437, GNorm = 4.1761, lr_0 = 9.8537e-04
Loss = 5.5862e-01, PNorm = 40.5594, GNorm = 3.7314, lr_0 = 9.8912e-04
Loss = 5.6279e-01, PNorm = 40.5820, GNorm = 2.4344, lr_0 = 9.9288e-04
Loss = 4.8767e-01, PNorm = 40.5965, GNorm = 1.3828, lr_0 = 9.9663e-04
Loss = 5.5770e-01, PNorm = 40.6125, GNorm = 3.8640, lr_0 = 9.9993e-04
Validation mae = 0.132049
Epoch 2
Loss = 5.4083e-01, PNorm = 40.6336, GNorm = 3.6431, lr_0 = 9.9925e-04
Loss = 5.5809e-01, PNorm = 40.6598, GNorm = 1.7539, lr_0 = 9.9856e-04
Loss = 6.7998e-01, PNorm = 40.6861, GNorm = 4.6533, lr_0 = 9.9788e-04
Loss = 6.8067e-01, PNorm = 40.7098, GNorm = 1.2923, lr_0 = 9.9719e-04
Loss = 5.4867e-01, PNorm = 40.7280, GNorm = 1.8046, lr_0 = 9.9651e-04
Loss = 6.4135e-01, PNorm = 40.7576, GNorm = 2.8742, lr_0 = 9.9583e-04
Loss = 5.2993e-01, PNorm = 40.7824, GNorm = 2.6704, lr_0 = 9.9515e-04
Loss = 5.4829e-01, PNorm = 40.7970, GNorm = 2.0197, lr_0 = 9.9446e-04
Loss = 5.6268e-01, PNorm = 40.8119, GNorm = 3.8176, lr_0 = 9.9378e-04
Loss = 5.5543e-01, PNorm = 40.8353, GNorm = 1.7113, lr_0 = 9.9310e-04
Loss = 5.1507e-01, PNorm = 40.8528, GNorm = 3.4151, lr_0 = 9.9242e-04
Loss = 4.9041e-01, PNorm = 40.8719, GNorm = 1.9776, lr_0 = 9.9174e-04
Loss = 5.2009e-01, PNorm = 40.8887, GNorm = 5.2321, lr_0 = 9.9106e-04
Loss = 5.4288e-01, PNorm = 40.9032, GNorm = 1.7153, lr_0 = 9.9038e-04
Loss = 5.4840e-01, PNorm = 40.9286, GNorm = 3.1320, lr_0 = 9.8971e-04
Loss = 6.1554e-01, PNorm = 40.9549, GNorm = 1.8250, lr_0 = 9.8903e-04
Loss = 4.9006e-01, PNorm = 40.9781, GNorm = 0.7794, lr_0 = 9.8835e-04
Loss = 5.1055e-01, PNorm = 40.9973, GNorm = 1.0674, lr_0 = 9.8767e-04
Loss = 5.6379e-01, PNorm = 41.0197, GNorm = 1.2935, lr_0 = 9.8700e-04
Loss = 5.0158e-01, PNorm = 41.0362, GNorm = 2.2766, lr_0 = 9.8632e-04
Loss = 5.1718e-01, PNorm = 41.0566, GNorm = 2.0541, lr_0 = 9.8564e-04
Loss = 5.0715e-01, PNorm = 41.0778, GNorm = 1.2499, lr_0 = 9.8497e-04
Loss = 5.4705e-01, PNorm = 41.0945, GNorm = 2.1963, lr_0 = 9.8429e-04
Loss = 5.2803e-01, PNorm = 41.1197, GNorm = 3.2734, lr_0 = 9.8362e-04
Loss = 5.5293e-01, PNorm = 41.1438, GNorm = 3.3840, lr_0 = 9.8295e-04
Loss = 5.8634e-01, PNorm = 41.1610, GNorm = 2.0898, lr_0 = 9.8227e-04
Loss = 5.3527e-01, PNorm = 41.1874, GNorm = 1.9090, lr_0 = 9.8160e-04
Loss = 5.2311e-01, PNorm = 41.2080, GNorm = 1.3750, lr_0 = 9.8093e-04
Loss = 5.5837e-01, PNorm = 41.2263, GNorm = 1.5828, lr_0 = 9.8026e-04
Loss = 6.2433e-01, PNorm = 41.2323, GNorm = 1.6373, lr_0 = 9.7958e-04
Loss = 6.1214e-01, PNorm = 41.2577, GNorm = 1.4802, lr_0 = 9.7891e-04
Loss = 6.2543e-01, PNorm = 41.2928, GNorm = 2.8285, lr_0 = 9.7824e-04
Loss = 5.8685e-01, PNorm = 41.3308, GNorm = 1.4061, lr_0 = 9.7757e-04
Loss = 5.5146e-01, PNorm = 41.3589, GNorm = 1.2150, lr_0 = 9.7690e-04
Loss = 5.1858e-01, PNorm = 41.3725, GNorm = 2.7215, lr_0 = 9.7623e-04
Loss = 4.9568e-01, PNorm = 41.3892, GNorm = 2.0666, lr_0 = 9.7556e-04
Loss = 5.1425e-01, PNorm = 41.3984, GNorm = 4.0756, lr_0 = 9.7490e-04
Loss = 4.9526e-01, PNorm = 41.4185, GNorm = 1.7248, lr_0 = 9.7423e-04
Loss = 6.4875e-01, PNorm = 41.4457, GNorm = 3.1196, lr_0 = 9.7356e-04
Loss = 6.1298e-01, PNorm = 41.4642, GNorm = 1.5791, lr_0 = 9.7289e-04
Loss = 5.4924e-01, PNorm = 41.4925, GNorm = 2.7128, lr_0 = 9.7223e-04
Loss = 5.2596e-01, PNorm = 41.5158, GNorm = 3.4417, lr_0 = 9.7156e-04
Loss = 6.6302e-01, PNorm = 41.5340, GNorm = 3.1255, lr_0 = 9.7090e-04
Loss = 5.8641e-01, PNorm = 41.5563, GNorm = 2.0726, lr_0 = 9.7023e-04
Loss = 6.4949e-01, PNorm = 41.5785, GNorm = 2.7498, lr_0 = 9.6957e-04
Loss = 5.5739e-01, PNorm = 41.5990, GNorm = 1.7218, lr_0 = 9.6890e-04
Loss = 4.5802e-01, PNorm = 41.6236, GNorm = 1.2607, lr_0 = 9.6824e-04
Loss = 5.8717e-01, PNorm = 41.6503, GNorm = 1.0157, lr_0 = 9.6757e-04
Loss = 4.9495e-01, PNorm = 41.6763, GNorm = 0.8381, lr_0 = 9.6691e-04
Loss = 5.6091e-01, PNorm = 41.6904, GNorm = 1.4852, lr_0 = 9.6625e-04
Loss = 5.6358e-01, PNorm = 41.7064, GNorm = 1.0923, lr_0 = 9.6559e-04
Loss = 5.3925e-01, PNorm = 41.7286, GNorm = 2.2232, lr_0 = 9.6493e-04
Loss = 5.2098e-01, PNorm = 41.7561, GNorm = 1.6096, lr_0 = 9.6427e-04
Loss = 6.0787e-01, PNorm = 41.7770, GNorm = 1.4282, lr_0 = 9.6360e-04
Loss = 5.7116e-01, PNorm = 41.7930, GNorm = 1.8851, lr_0 = 9.6294e-04
Loss = 5.6805e-01, PNorm = 41.8088, GNorm = 3.5573, lr_0 = 9.6228e-04
Loss = 5.3251e-01, PNorm = 41.8247, GNorm = 2.0834, lr_0 = 9.6163e-04
Loss = 5.3131e-01, PNorm = 41.8461, GNorm = 3.9806, lr_0 = 9.6097e-04
Loss = 4.8955e-01, PNorm = 41.8657, GNorm = 1.2673, lr_0 = 9.6031e-04
Loss = 4.6129e-01, PNorm = 41.8804, GNorm = 1.6995, lr_0 = 9.5965e-04
Loss = 5.2870e-01, PNorm = 41.8920, GNorm = 1.6587, lr_0 = 9.5899e-04
Loss = 5.6378e-01, PNorm = 41.9137, GNorm = 3.4915, lr_0 = 9.5834e-04
Loss = 5.3841e-01, PNorm = 41.9295, GNorm = 0.9938, lr_0 = 9.5768e-04
Loss = 5.7062e-01, PNorm = 41.9536, GNorm = 1.0290, lr_0 = 9.5702e-04
Loss = 4.7624e-01, PNorm = 41.9725, GNorm = 1.3557, lr_0 = 9.5637e-04
Loss = 5.5157e-01, PNorm = 41.9929, GNorm = 2.5715, lr_0 = 9.5571e-04
Loss = 5.4157e-01, PNorm = 42.0123, GNorm = 2.0483, lr_0 = 9.5506e-04
Loss = 5.7368e-01, PNorm = 42.0379, GNorm = 1.1974, lr_0 = 9.5440e-04
Loss = 5.6406e-01, PNorm = 42.0631, GNorm = 1.2547, lr_0 = 9.5375e-04
Loss = 5.6426e-01, PNorm = 42.0739, GNorm = 1.4449, lr_0 = 9.5310e-04
Loss = 4.9534e-01, PNorm = 42.0883, GNorm = 2.1352, lr_0 = 9.5244e-04
Loss = 5.1257e-01, PNorm = 42.0981, GNorm = 1.6614, lr_0 = 9.5179e-04
Loss = 5.8661e-01, PNorm = 42.1204, GNorm = 2.3905, lr_0 = 9.5114e-04
Loss = 5.1877e-01, PNorm = 42.1498, GNorm = 4.4772, lr_0 = 9.5049e-04
Loss = 5.2896e-01, PNorm = 42.1609, GNorm = 3.4460, lr_0 = 9.4984e-04
Loss = 5.6841e-01, PNorm = 42.1749, GNorm = 1.4993, lr_0 = 9.4919e-04
Loss = 5.5831e-01, PNorm = 42.1979, GNorm = 1.9296, lr_0 = 9.4854e-04
Loss = 4.9162e-01, PNorm = 42.2215, GNorm = 1.3383, lr_0 = 9.4789e-04
Loss = 5.2124e-01, PNorm = 42.2458, GNorm = 0.9641, lr_0 = 9.4724e-04
Loss = 6.1372e-01, PNorm = 42.2601, GNorm = 3.5947, lr_0 = 9.4659e-04
Loss = 4.5857e-01, PNorm = 42.2842, GNorm = 1.1005, lr_0 = 9.4594e-04
Loss = 5.0335e-01, PNorm = 42.3125, GNorm = 1.3827, lr_0 = 9.4529e-04
Loss = 6.3023e-01, PNorm = 42.3263, GNorm = 1.2529, lr_0 = 9.4464e-04
Loss = 5.3851e-01, PNorm = 42.3376, GNorm = 3.3102, lr_0 = 9.4400e-04
Loss = 5.5958e-01, PNorm = 42.3524, GNorm = 1.3496, lr_0 = 9.4335e-04
Loss = 6.0893e-01, PNorm = 42.3746, GNorm = 3.2935, lr_0 = 9.4270e-04
Loss = 5.2133e-01, PNorm = 42.4029, GNorm = 3.0729, lr_0 = 9.4206e-04
Loss = 5.1700e-01, PNorm = 42.4336, GNorm = 1.1116, lr_0 = 9.4141e-04
Loss = 5.1730e-01, PNorm = 42.4560, GNorm = 1.6602, lr_0 = 9.4077e-04
Loss = 5.7739e-01, PNorm = 42.4833, GNorm = 3.5199, lr_0 = 9.4012e-04
Loss = 5.6152e-01, PNorm = 42.4970, GNorm = 1.9629, lr_0 = 9.3948e-04
Loss = 6.0860e-01, PNorm = 42.5145, GNorm = 1.3229, lr_0 = 9.3884e-04
Loss = 5.3233e-01, PNorm = 42.5421, GNorm = 2.9100, lr_0 = 9.3819e-04
Loss = 5.2331e-01, PNorm = 42.5586, GNorm = 3.9711, lr_0 = 9.3755e-04
Loss = 5.5715e-01, PNorm = 42.5794, GNorm = 1.5222, lr_0 = 9.3691e-04
Loss = 5.3559e-01, PNorm = 42.6066, GNorm = 1.6210, lr_0 = 9.3627e-04
Loss = 5.0758e-01, PNorm = 42.6335, GNorm = 3.4390, lr_0 = 9.3562e-04
Loss = 5.8744e-01, PNorm = 42.6401, GNorm = 2.4799, lr_0 = 9.3498e-04
Loss = 5.0195e-01, PNorm = 42.6527, GNorm = 3.8274, lr_0 = 9.3434e-04
Loss = 5.1512e-01, PNorm = 42.6780, GNorm = 1.1945, lr_0 = 9.3370e-04
Loss = 5.5770e-01, PNorm = 42.7091, GNorm = 1.2992, lr_0 = 9.3306e-04
Loss = 5.0670e-01, PNorm = 42.7217, GNorm = 1.2035, lr_0 = 9.3242e-04
Loss = 5.1385e-01, PNorm = 42.7504, GNorm = 2.0079, lr_0 = 9.3178e-04
Loss = 5.3335e-01, PNorm = 42.7727, GNorm = 1.6209, lr_0 = 9.3115e-04
Loss = 5.3557e-01, PNorm = 42.7942, GNorm = 4.5521, lr_0 = 9.3051e-04
Loss = 5.4796e-01, PNorm = 42.8162, GNorm = 0.7751, lr_0 = 9.2987e-04
Loss = 5.6255e-01, PNorm = 42.8417, GNorm = 2.8060, lr_0 = 9.2923e-04
Loss = 4.8800e-01, PNorm = 42.8600, GNorm = 1.2962, lr_0 = 9.2860e-04
Loss = 5.0423e-01, PNorm = 42.8824, GNorm = 1.9811, lr_0 = 9.2796e-04
Loss = 5.1756e-01, PNorm = 42.8925, GNorm = 2.7810, lr_0 = 9.2733e-04
Loss = 6.2815e-01, PNorm = 42.9077, GNorm = 4.1280, lr_0 = 9.2669e-04
Loss = 5.3707e-01, PNorm = 42.9244, GNorm = 2.5358, lr_0 = 9.2606e-04
Loss = 6.2025e-01, PNorm = 42.9454, GNorm = 2.5303, lr_0 = 9.2542e-04
Loss = 5.3579e-01, PNorm = 42.9614, GNorm = 2.2016, lr_0 = 9.2479e-04
Loss = 4.9956e-01, PNorm = 42.9816, GNorm = 1.1016, lr_0 = 9.2415e-04
Loss = 5.6212e-01, PNorm = 43.0049, GNorm = 1.6359, lr_0 = 9.2352e-04
Loss = 5.1614e-01, PNorm = 43.0098, GNorm = 1.4534, lr_0 = 9.2289e-04
Loss = 5.1838e-01, PNorm = 43.0142, GNorm = 1.6645, lr_0 = 9.2226e-04
Loss = 5.7225e-01, PNorm = 43.0328, GNorm = 1.2089, lr_0 = 9.2162e-04
Loss = 6.0671e-01, PNorm = 43.0540, GNorm = 1.8709, lr_0 = 9.2099e-04
Validation mae = 0.127377
Epoch 3
Loss = 6.0899e-01, PNorm = 43.0864, GNorm = 2.7662, lr_0 = 9.2036e-04
Loss = 5.1785e-01, PNorm = 43.1119, GNorm = 0.9788, lr_0 = 9.1973e-04
Loss = 5.5332e-01, PNorm = 43.1485, GNorm = 4.6285, lr_0 = 9.1910e-04
Loss = 4.4750e-01, PNorm = 43.1696, GNorm = 1.5159, lr_0 = 9.1847e-04
Loss = 5.2750e-01, PNorm = 43.1927, GNorm = 1.1547, lr_0 = 9.1784e-04
Loss = 4.8146e-01, PNorm = 43.2120, GNorm = 1.0911, lr_0 = 9.1721e-04
Loss = 4.9963e-01, PNorm = 43.2220, GNorm = 1.2143, lr_0 = 9.1658e-04
Loss = 5.6067e-01, PNorm = 43.2435, GNorm = 2.4253, lr_0 = 9.1596e-04
Loss = 4.4895e-01, PNorm = 43.2681, GNorm = 1.1345, lr_0 = 9.1533e-04
Loss = 5.3044e-01, PNorm = 43.2828, GNorm = 3.5196, lr_0 = 9.1470e-04
Loss = 4.9261e-01, PNorm = 43.3035, GNorm = 0.9829, lr_0 = 9.1408e-04
Loss = 5.3344e-01, PNorm = 43.3311, GNorm = 2.1156, lr_0 = 9.1345e-04
Loss = 4.8247e-01, PNorm = 43.3460, GNorm = 2.6117, lr_0 = 9.1282e-04
Loss = 4.8929e-01, PNorm = 43.3622, GNorm = 1.2614, lr_0 = 9.1220e-04
Loss = 4.4546e-01, PNorm = 43.3748, GNorm = 1.0570, lr_0 = 9.1157e-04
Loss = 5.5735e-01, PNorm = 43.3950, GNorm = 1.1452, lr_0 = 9.1095e-04
Loss = 4.2261e-01, PNorm = 43.4148, GNorm = 1.9263, lr_0 = 9.1032e-04
Loss = 5.5822e-01, PNorm = 43.4318, GNorm = 1.3658, lr_0 = 9.0970e-04
Loss = 4.7739e-01, PNorm = 43.4550, GNorm = 5.3020, lr_0 = 9.0908e-04
Loss = 5.5134e-01, PNorm = 43.4791, GNorm = 1.1662, lr_0 = 9.0846e-04
Loss = 5.0873e-01, PNorm = 43.4982, GNorm = 1.6792, lr_0 = 9.0783e-04
Loss = 5.1590e-01, PNorm = 43.5163, GNorm = 1.5362, lr_0 = 9.0721e-04
Loss = 5.4696e-01, PNorm = 43.5422, GNorm = 2.4568, lr_0 = 9.0659e-04
Loss = 4.7164e-01, PNorm = 43.5696, GNorm = 1.3530, lr_0 = 9.0597e-04
Loss = 4.8758e-01, PNorm = 43.5953, GNorm = 2.0650, lr_0 = 9.0535e-04
Loss = 4.8078e-01, PNorm = 43.6161, GNorm = 2.2445, lr_0 = 9.0473e-04
Loss = 5.4048e-01, PNorm = 43.6260, GNorm = 1.0980, lr_0 = 9.0411e-04
Loss = 5.0428e-01, PNorm = 43.6483, GNorm = 1.2537, lr_0 = 9.0349e-04
Loss = 5.2042e-01, PNorm = 43.6715, GNorm = 2.4460, lr_0 = 9.0287e-04
Loss = 5.1803e-01, PNorm = 43.6850, GNorm = 2.0870, lr_0 = 9.0225e-04
Loss = 5.2697e-01, PNorm = 43.7049, GNorm = 1.4100, lr_0 = 9.0163e-04
Loss = 5.6576e-01, PNorm = 43.7259, GNorm = 1.9060, lr_0 = 9.0102e-04
Loss = 5.7995e-01, PNorm = 43.7483, GNorm = 2.4743, lr_0 = 9.0040e-04
Loss = 4.5503e-01, PNorm = 43.7655, GNorm = 2.1854, lr_0 = 8.9978e-04
Loss = 5.6023e-01, PNorm = 43.7791, GNorm = 2.6532, lr_0 = 8.9916e-04
Loss = 5.4239e-01, PNorm = 43.7979, GNorm = 1.2343, lr_0 = 8.9855e-04
Loss = 4.9084e-01, PNorm = 43.8242, GNorm = 1.5522, lr_0 = 8.9793e-04
Loss = 5.5142e-01, PNorm = 43.8412, GNorm = 1.3835, lr_0 = 8.9732e-04
Loss = 4.6666e-01, PNorm = 43.8574, GNorm = 2.5939, lr_0 = 8.9670e-04
Loss = 4.6366e-01, PNorm = 43.8763, GNorm = 1.7557, lr_0 = 8.9609e-04
Loss = 6.2371e-01, PNorm = 43.8977, GNorm = 2.6232, lr_0 = 8.9548e-04
Loss = 4.9792e-01, PNorm = 43.9217, GNorm = 2.2407, lr_0 = 8.9486e-04
Loss = 5.7878e-01, PNorm = 43.9385, GNorm = 2.6339, lr_0 = 8.9425e-04
Loss = 4.8437e-01, PNorm = 43.9623, GNorm = 0.7685, lr_0 = 8.9364e-04
Loss = 4.8442e-01, PNorm = 43.9943, GNorm = 1.5045, lr_0 = 8.9302e-04
Loss = 5.0208e-01, PNorm = 44.0139, GNorm = 1.6993, lr_0 = 8.9241e-04
Loss = 5.0044e-01, PNorm = 44.0270, GNorm = 2.4054, lr_0 = 8.9180e-04
Loss = 4.5743e-01, PNorm = 44.0475, GNorm = 1.6428, lr_0 = 8.9119e-04
Loss = 5.3025e-01, PNorm = 44.0642, GNorm = 1.2063, lr_0 = 8.9058e-04
Loss = 4.9023e-01, PNorm = 44.0792, GNorm = 1.2337, lr_0 = 8.8997e-04
Loss = 4.7546e-01, PNorm = 44.0963, GNorm = 1.8768, lr_0 = 8.8936e-04
Loss = 5.5777e-01, PNorm = 44.1090, GNorm = 1.8210, lr_0 = 8.8875e-04
Loss = 4.3010e-01, PNorm = 44.1267, GNorm = 1.8313, lr_0 = 8.8814e-04
Loss = 5.4930e-01, PNorm = 44.1488, GNorm = 1.3513, lr_0 = 8.8753e-04
Loss = 5.2867e-01, PNorm = 44.1701, GNorm = 1.4487, lr_0 = 8.8693e-04
Loss = 5.6396e-01, PNorm = 44.1924, GNorm = 1.2646, lr_0 = 8.8632e-04
Loss = 4.5315e-01, PNorm = 44.2130, GNorm = 1.0019, lr_0 = 8.8571e-04
Loss = 5.9243e-01, PNorm = 44.2293, GNorm = 1.5613, lr_0 = 8.8510e-04
Loss = 5.0062e-01, PNorm = 44.2519, GNorm = 1.6158, lr_0 = 8.8450e-04
Loss = 4.4832e-01, PNorm = 44.2703, GNorm = 1.4455, lr_0 = 8.8389e-04
Loss = 5.1817e-01, PNorm = 44.2836, GNorm = 1.2488, lr_0 = 8.8329e-04
Loss = 5.1898e-01, PNorm = 44.2992, GNorm = 1.4218, lr_0 = 8.8268e-04
Loss = 5.3457e-01, PNorm = 44.3044, GNorm = 1.4224, lr_0 = 8.8208e-04
Loss = 4.6953e-01, PNorm = 44.3281, GNorm = 2.8600, lr_0 = 8.8147e-04
Loss = 5.3614e-01, PNorm = 44.3447, GNorm = 1.5384, lr_0 = 8.8087e-04
Loss = 5.6236e-01, PNorm = 44.3636, GNorm = 1.5714, lr_0 = 8.8026e-04
Loss = 5.0044e-01, PNorm = 44.3821, GNorm = 1.9421, lr_0 = 8.7966e-04
Loss = 5.3809e-01, PNorm = 44.4069, GNorm = 1.2487, lr_0 = 8.7906e-04
Loss = 4.8943e-01, PNorm = 44.4307, GNorm = 2.7022, lr_0 = 8.7846e-04
Loss = 4.5650e-01, PNorm = 44.4482, GNorm = 1.4045, lr_0 = 8.7785e-04
Loss = 5.3288e-01, PNorm = 44.4631, GNorm = 1.2632, lr_0 = 8.7725e-04
Loss = 6.1187e-01, PNorm = 44.4885, GNorm = 1.2462, lr_0 = 8.7665e-04
Loss = 4.9831e-01, PNorm = 44.5071, GNorm = 1.3272, lr_0 = 8.7605e-04
Loss = 4.9391e-01, PNorm = 44.5355, GNorm = 1.9602, lr_0 = 8.7545e-04
Loss = 4.8776e-01, PNorm = 44.5535, GNorm = 2.7409, lr_0 = 8.7485e-04
Loss = 5.0770e-01, PNorm = 44.5667, GNorm = 1.4248, lr_0 = 8.7425e-04
Loss = 4.9731e-01, PNorm = 44.5770, GNorm = 3.5501, lr_0 = 8.7365e-04
Loss = 5.1069e-01, PNorm = 44.6003, GNorm = 2.3344, lr_0 = 8.7306e-04
Loss = 4.8399e-01, PNorm = 44.6168, GNorm = 3.1962, lr_0 = 8.7246e-04
Loss = 5.2467e-01, PNorm = 44.6308, GNorm = 1.0828, lr_0 = 8.7186e-04
Loss = 4.4738e-01, PNorm = 44.6533, GNorm = 1.0130, lr_0 = 8.7126e-04
Loss = 6.0170e-01, PNorm = 44.6687, GNorm = 1.2050, lr_0 = 8.7067e-04
Loss = 5.3011e-01, PNorm = 44.6843, GNorm = 2.5446, lr_0 = 8.7007e-04
Loss = 4.6740e-01, PNorm = 44.7029, GNorm = 1.6184, lr_0 = 8.6947e-04
Loss = 5.3476e-01, PNorm = 44.7232, GNorm = 3.6020, lr_0 = 8.6888e-04
Loss = 5.4623e-01, PNorm = 44.7350, GNorm = 1.4768, lr_0 = 8.6828e-04
Loss = 5.9643e-01, PNorm = 44.7617, GNorm = 1.6327, lr_0 = 8.6769e-04
Loss = 5.2220e-01, PNorm = 44.7866, GNorm = 1.6290, lr_0 = 8.6709e-04
Loss = 4.9679e-01, PNorm = 44.8154, GNorm = 1.1934, lr_0 = 8.6650e-04
Loss = 5.6478e-01, PNorm = 44.8460, GNorm = 1.0706, lr_0 = 8.6590e-04
Loss = 5.2153e-01, PNorm = 44.8698, GNorm = 1.0118, lr_0 = 8.6531e-04
Loss = 4.9164e-01, PNorm = 44.8909, GNorm = 1.2605, lr_0 = 8.6472e-04
Loss = 5.7157e-01, PNorm = 44.9128, GNorm = 1.5348, lr_0 = 8.6413e-04
Loss = 5.5700e-01, PNorm = 44.9375, GNorm = 1.1142, lr_0 = 8.6353e-04
Loss = 5.3152e-01, PNorm = 44.9583, GNorm = 0.9871, lr_0 = 8.6294e-04
Loss = 4.5216e-01, PNorm = 44.9692, GNorm = 1.1223, lr_0 = 8.6235e-04
Loss = 4.6842e-01, PNorm = 44.9886, GNorm = 1.3791, lr_0 = 8.6176e-04
Loss = 5.1079e-01, PNorm = 45.0104, GNorm = 4.5088, lr_0 = 8.6117e-04
Loss = 5.3472e-01, PNorm = 45.0215, GNorm = 2.8659, lr_0 = 8.6058e-04
Loss = 4.8360e-01, PNorm = 45.0398, GNorm = 1.8485, lr_0 = 8.5999e-04
Loss = 4.4766e-01, PNorm = 45.0634, GNorm = 1.2096, lr_0 = 8.5940e-04
Loss = 4.7735e-01, PNorm = 45.0773, GNorm = 1.7593, lr_0 = 8.5881e-04
Loss = 4.8560e-01, PNorm = 45.0916, GNorm = 1.5658, lr_0 = 8.5823e-04
Loss = 4.5935e-01, PNorm = 45.1055, GNorm = 2.1200, lr_0 = 8.5764e-04
Loss = 5.1165e-01, PNorm = 45.1229, GNorm = 1.1576, lr_0 = 8.5705e-04
Loss = 5.2502e-01, PNorm = 45.1396, GNorm = 1.1555, lr_0 = 8.5646e-04
Loss = 5.0399e-01, PNorm = 45.1443, GNorm = 1.1528, lr_0 = 8.5588e-04
Loss = 4.7143e-01, PNorm = 45.1512, GNorm = 1.7619, lr_0 = 8.5529e-04
Loss = 5.1329e-01, PNorm = 45.1674, GNorm = 1.2754, lr_0 = 8.5470e-04
Loss = 4.6468e-01, PNorm = 45.1891, GNorm = 1.2620, lr_0 = 8.5412e-04
Loss = 4.7043e-01, PNorm = 45.2044, GNorm = 1.3329, lr_0 = 8.5353e-04
Loss = 4.9940e-01, PNorm = 45.2129, GNorm = 1.0828, lr_0 = 8.5295e-04
Loss = 5.2972e-01, PNorm = 45.2346, GNorm = 1.3461, lr_0 = 8.5236e-04
Loss = 4.3947e-01, PNorm = 45.2599, GNorm = 1.2337, lr_0 = 8.5178e-04
Loss = 5.3398e-01, PNorm = 45.2811, GNorm = 1.7858, lr_0 = 8.5120e-04
Loss = 5.4883e-01, PNorm = 45.3184, GNorm = 1.9662, lr_0 = 8.5061e-04
Loss = 5.3745e-01, PNorm = 45.3515, GNorm = 1.7411, lr_0 = 8.5003e-04
Loss = 4.8721e-01, PNorm = 45.3797, GNorm = 2.4626, lr_0 = 8.4945e-04
Loss = 4.7261e-01, PNorm = 45.3935, GNorm = 1.0136, lr_0 = 8.4887e-04
Loss = 5.4744e-01, PNorm = 45.4093, GNorm = 2.4801, lr_0 = 8.4828e-04
Validation mae = 0.122350
Epoch 4
Loss = 5.2728e-01, PNorm = 45.4228, GNorm = 1.2638, lr_0 = 8.4770e-04
Loss = 5.0018e-01, PNorm = 45.4503, GNorm = 3.0666, lr_0 = 8.4712e-04
Loss = 4.7581e-01, PNorm = 45.4764, GNorm = 3.0226, lr_0 = 8.4654e-04
Loss = 5.5986e-01, PNorm = 45.5021, GNorm = 1.4003, lr_0 = 8.4596e-04
Loss = 4.9143e-01, PNorm = 45.5248, GNorm = 0.8664, lr_0 = 8.4538e-04
Loss = 5.3786e-01, PNorm = 45.5434, GNorm = 0.9263, lr_0 = 8.4480e-04
Loss = 5.1127e-01, PNorm = 45.5579, GNorm = 2.7483, lr_0 = 8.4423e-04
Loss = 5.0173e-01, PNorm = 45.5852, GNorm = 1.5637, lr_0 = 8.4365e-04
Loss = 5.1416e-01, PNorm = 45.6082, GNorm = 1.2979, lr_0 = 8.4307e-04
Loss = 4.5554e-01, PNorm = 45.6312, GNorm = 0.9797, lr_0 = 8.4249e-04
Loss = 4.5461e-01, PNorm = 45.6498, GNorm = 1.1828, lr_0 = 8.4191e-04
Loss = 5.1881e-01, PNorm = 45.6654, GNorm = 0.9946, lr_0 = 8.4134e-04
Loss = 4.7750e-01, PNorm = 45.6852, GNorm = 1.9958, lr_0 = 8.4076e-04
Loss = 4.8584e-01, PNorm = 45.7056, GNorm = 1.0741, lr_0 = 8.4019e-04
Loss = 4.5198e-01, PNorm = 45.7254, GNorm = 1.9343, lr_0 = 8.3961e-04
Loss = 4.9689e-01, PNorm = 45.7376, GNorm = 1.5723, lr_0 = 8.3903e-04
Loss = 4.6332e-01, PNorm = 45.7561, GNorm = 3.2506, lr_0 = 8.3846e-04
Loss = 5.2403e-01, PNorm = 45.7688, GNorm = 1.8238, lr_0 = 8.3789e-04
Loss = 5.0380e-01, PNorm = 45.7814, GNorm = 1.2323, lr_0 = 8.3731e-04
Loss = 5.3617e-01, PNorm = 45.7972, GNorm = 1.0499, lr_0 = 8.3674e-04
Loss = 4.1596e-01, PNorm = 45.8170, GNorm = 1.0242, lr_0 = 8.3616e-04
Loss = 4.8405e-01, PNorm = 45.8341, GNorm = 1.3854, lr_0 = 8.3559e-04
Loss = 5.1652e-01, PNorm = 45.8586, GNorm = 1.6144, lr_0 = 8.3502e-04
Loss = 4.9625e-01, PNorm = 45.8761, GNorm = 1.5503, lr_0 = 8.3445e-04
Loss = 4.4334e-01, PNorm = 45.8955, GNorm = 1.6101, lr_0 = 8.3388e-04
Loss = 4.8156e-01, PNorm = 45.9182, GNorm = 2.2127, lr_0 = 8.3330e-04
Loss = 5.0196e-01, PNorm = 45.9364, GNorm = 0.8040, lr_0 = 8.3273e-04
Loss = 4.6417e-01, PNorm = 45.9587, GNorm = 1.2330, lr_0 = 8.3216e-04
Loss = 4.6862e-01, PNorm = 45.9858, GNorm = 1.3980, lr_0 = 8.3159e-04
Loss = 4.7907e-01, PNorm = 46.0005, GNorm = 1.2309, lr_0 = 8.3102e-04
Loss = 5.4584e-01, PNorm = 46.0169, GNorm = 2.1317, lr_0 = 8.3045e-04
Loss = 5.2180e-01, PNorm = 46.0398, GNorm = 2.7529, lr_0 = 8.2988e-04
Loss = 5.3628e-01, PNorm = 46.0611, GNorm = 2.1338, lr_0 = 8.2932e-04
Loss = 4.6892e-01, PNorm = 46.0832, GNorm = 2.8791, lr_0 = 8.2875e-04
Loss = 4.5434e-01, PNorm = 46.0989, GNorm = 1.1820, lr_0 = 8.2818e-04
Loss = 4.3131e-01, PNorm = 46.1181, GNorm = 1.1824, lr_0 = 8.2761e-04
Loss = 4.8515e-01, PNorm = 46.1410, GNorm = 2.1045, lr_0 = 8.2705e-04
Loss = 5.8614e-01, PNorm = 46.1598, GNorm = 1.8948, lr_0 = 8.2648e-04
Loss = 4.7569e-01, PNorm = 46.1795, GNorm = 1.7638, lr_0 = 8.2591e-04
Loss = 5.2374e-01, PNorm = 46.2006, GNorm = 1.4838, lr_0 = 8.2535e-04
Loss = 5.0389e-01, PNorm = 46.2126, GNorm = 2.1540, lr_0 = 8.2478e-04
Loss = 5.0643e-01, PNorm = 46.2272, GNorm = 1.6288, lr_0 = 8.2422e-04
Loss = 4.9657e-01, PNorm = 46.2551, GNorm = 1.5359, lr_0 = 8.2365e-04
Loss = 4.2598e-01, PNorm = 46.2826, GNorm = 0.9758, lr_0 = 8.2309e-04
Loss = 5.1330e-01, PNorm = 46.2919, GNorm = 1.3160, lr_0 = 8.2252e-04
Loss = 4.5122e-01, PNorm = 46.3086, GNorm = 2.0315, lr_0 = 8.2196e-04
Loss = 4.6663e-01, PNorm = 46.3255, GNorm = 1.1812, lr_0 = 8.2140e-04
Loss = 4.8290e-01, PNorm = 46.3433, GNorm = 2.0950, lr_0 = 8.2084e-04
Loss = 5.1074e-01, PNorm = 46.3569, GNorm = 1.6658, lr_0 = 8.2027e-04
Loss = 4.8844e-01, PNorm = 46.3671, GNorm = 1.7687, lr_0 = 8.1971e-04
Loss = 5.0177e-01, PNorm = 46.3870, GNorm = 1.0441, lr_0 = 8.1915e-04
Loss = 5.9973e-01, PNorm = 46.4138, GNorm = 1.6496, lr_0 = 8.1859e-04
Loss = 5.0265e-01, PNorm = 46.4368, GNorm = 1.1129, lr_0 = 8.1803e-04
Loss = 4.8847e-01, PNorm = 46.4619, GNorm = 1.8247, lr_0 = 8.1747e-04
Loss = 4.3687e-01, PNorm = 46.4793, GNorm = 1.2266, lr_0 = 8.1691e-04
Loss = 4.9846e-01, PNorm = 46.5061, GNorm = 2.3968, lr_0 = 8.1635e-04
Loss = 4.9085e-01, PNorm = 46.5213, GNorm = 1.3981, lr_0 = 8.1579e-04
Loss = 4.3233e-01, PNorm = 46.5423, GNorm = 1.0008, lr_0 = 8.1523e-04
Loss = 4.4775e-01, PNorm = 46.5616, GNorm = 1.5764, lr_0 = 8.1467e-04
Loss = 4.8173e-01, PNorm = 46.5702, GNorm = 2.0113, lr_0 = 8.1411e-04
Loss = 4.7384e-01, PNorm = 46.5862, GNorm = 0.9391, lr_0 = 8.1355e-04
Loss = 5.3853e-01, PNorm = 46.6092, GNorm = 1.4460, lr_0 = 8.1300e-04
Loss = 3.8437e-01, PNorm = 46.6248, GNorm = 1.1804, lr_0 = 8.1244e-04
Loss = 4.8312e-01, PNorm = 46.6402, GNorm = 1.3712, lr_0 = 8.1188e-04
Loss = 4.7174e-01, PNorm = 46.6532, GNorm = 2.2017, lr_0 = 8.1133e-04
Loss = 4.6502e-01, PNorm = 46.6722, GNorm = 1.6337, lr_0 = 8.1077e-04
Loss = 4.8979e-01, PNorm = 46.6900, GNorm = 1.6131, lr_0 = 8.1022e-04
Loss = 4.1732e-01, PNorm = 46.7080, GNorm = 0.8664, lr_0 = 8.0966e-04
Loss = 5.3988e-01, PNorm = 46.7281, GNorm = 1.3188, lr_0 = 8.0911e-04
Loss = 4.7787e-01, PNorm = 46.7400, GNorm = 1.2099, lr_0 = 8.0855e-04
Loss = 5.4242e-01, PNorm = 46.7450, GNorm = 2.2055, lr_0 = 8.0800e-04
Loss = 4.5950e-01, PNorm = 46.7634, GNorm = 1.3983, lr_0 = 8.0745e-04
Loss = 4.4001e-01, PNorm = 46.7837, GNorm = 1.1750, lr_0 = 8.0689e-04
Loss = 5.0563e-01, PNorm = 46.8081, GNorm = 1.5052, lr_0 = 8.0634e-04
Loss = 5.4973e-01, PNorm = 46.8301, GNorm = 1.1290, lr_0 = 8.0579e-04
Loss = 4.5201e-01, PNorm = 46.8560, GNorm = 1.1125, lr_0 = 8.0523e-04
Loss = 4.5032e-01, PNorm = 46.8765, GNorm = 1.0951, lr_0 = 8.0468e-04
Loss = 5.2509e-01, PNorm = 46.8893, GNorm = 1.2739, lr_0 = 8.0413e-04
Loss = 4.9260e-01, PNorm = 46.8975, GNorm = 1.7278, lr_0 = 8.0358e-04
Loss = 6.1685e-01, PNorm = 46.9150, GNorm = 1.2504, lr_0 = 8.0303e-04
Loss = 5.2802e-01, PNorm = 46.9323, GNorm = 1.5579, lr_0 = 8.0248e-04
Loss = 5.5310e-01, PNorm = 46.9458, GNorm = 1.5928, lr_0 = 8.0193e-04
Loss = 4.6712e-01, PNorm = 46.9600, GNorm = 2.4691, lr_0 = 8.0138e-04
Loss = 4.7415e-01, PNorm = 46.9853, GNorm = 0.9681, lr_0 = 8.0083e-04
Loss = 5.9364e-01, PNorm = 47.0057, GNorm = 1.7593, lr_0 = 8.0028e-04
Loss = 4.9593e-01, PNorm = 47.0233, GNorm = 1.5217, lr_0 = 7.9974e-04
Loss = 5.0794e-01, PNorm = 47.0361, GNorm = 1.6301, lr_0 = 7.9919e-04
Loss = 5.5439e-01, PNorm = 47.0571, GNorm = 1.1536, lr_0 = 7.9864e-04
Loss = 5.5394e-01, PNorm = 47.0790, GNorm = 2.2339, lr_0 = 7.9809e-04
Loss = 5.2316e-01, PNorm = 47.1003, GNorm = 1.6853, lr_0 = 7.9755e-04
Loss = 4.1954e-01, PNorm = 47.1166, GNorm = 1.4047, lr_0 = 7.9700e-04
Loss = 5.0125e-01, PNorm = 47.1401, GNorm = 1.1812, lr_0 = 7.9645e-04
Loss = 4.7815e-01, PNorm = 47.1532, GNorm = 0.8788, lr_0 = 7.9591e-04
Loss = 4.9592e-01, PNorm = 47.1700, GNorm = 1.7950, lr_0 = 7.9536e-04
Loss = 4.6746e-01, PNorm = 47.1955, GNorm = 1.4624, lr_0 = 7.9482e-04
Loss = 5.0835e-01, PNorm = 47.2193, GNorm = 1.6473, lr_0 = 7.9427e-04
Loss = 4.5601e-01, PNorm = 47.2389, GNorm = 2.5725, lr_0 = 7.9373e-04
Loss = 3.8666e-01, PNorm = 47.2528, GNorm = 1.2487, lr_0 = 7.9319e-04
Loss = 4.2402e-01, PNorm = 47.2689, GNorm = 1.1133, lr_0 = 7.9264e-04
Loss = 4.5167e-01, PNorm = 47.2812, GNorm = 1.7122, lr_0 = 7.9210e-04
Loss = 4.0722e-01, PNorm = 47.2966, GNorm = 0.9893, lr_0 = 7.9156e-04
Loss = 5.0494e-01, PNorm = 47.3066, GNorm = 1.4626, lr_0 = 7.9101e-04
Loss = 4.7404e-01, PNorm = 47.3215, GNorm = 1.7590, lr_0 = 7.9047e-04
Loss = 4.3918e-01, PNorm = 47.3399, GNorm = 1.1688, lr_0 = 7.8993e-04
Loss = 5.2540e-01, PNorm = 47.3539, GNorm = 1.1731, lr_0 = 7.8939e-04
Loss = 4.5086e-01, PNorm = 47.3660, GNorm = 1.4071, lr_0 = 7.8885e-04
Loss = 4.9851e-01, PNorm = 47.3892, GNorm = 2.3743, lr_0 = 7.8831e-04
Loss = 4.2679e-01, PNorm = 47.4128, GNorm = 1.3273, lr_0 = 7.8777e-04
Loss = 5.5256e-01, PNorm = 47.4371, GNorm = 1.0708, lr_0 = 7.8723e-04
Loss = 4.8269e-01, PNorm = 47.4632, GNorm = 1.3060, lr_0 = 7.8669e-04
Loss = 4.4297e-01, PNorm = 47.4890, GNorm = 1.2474, lr_0 = 7.8615e-04
Loss = 4.6251e-01, PNorm = 47.5120, GNorm = 1.4500, lr_0 = 7.8561e-04
Loss = 4.1846e-01, PNorm = 47.5268, GNorm = 2.0251, lr_0 = 7.8507e-04
Loss = 4.5882e-01, PNorm = 47.5483, GNorm = 1.1224, lr_0 = 7.8454e-04
Loss = 4.2094e-01, PNorm = 47.5607, GNorm = 1.6789, lr_0 = 7.8400e-04
Loss = 4.9181e-01, PNorm = 47.5715, GNorm = 1.3017, lr_0 = 7.8346e-04
Loss = 4.5177e-01, PNorm = 47.5873, GNorm = 1.1745, lr_0 = 7.8293e-04
Loss = 4.8474e-01, PNorm = 47.6004, GNorm = 1.5562, lr_0 = 7.8239e-04
Loss = 5.0121e-01, PNorm = 47.6153, GNorm = 1.3772, lr_0 = 7.8185e-04
Loss = 4.0958e-01, PNorm = 47.6330, GNorm = 1.7590, lr_0 = 7.8132e-04
Validation mae = 0.122871
Epoch 5
Loss = 4.2745e-01, PNorm = 47.6540, GNorm = 1.3465, lr_0 = 7.8078e-04
Loss = 4.4231e-01, PNorm = 47.6665, GNorm = 1.4142, lr_0 = 7.8025e-04
Loss = 4.6728e-01, PNorm = 47.6781, GNorm = 1.3141, lr_0 = 7.7971e-04
Loss = 5.5408e-01, PNorm = 47.6914, GNorm = 1.6588, lr_0 = 7.7918e-04
Loss = 4.4737e-01, PNorm = 47.7161, GNorm = 1.7193, lr_0 = 7.7864e-04
Loss = 4.7044e-01, PNorm = 47.7428, GNorm = 1.4895, lr_0 = 7.7811e-04
Loss = 4.8980e-01, PNorm = 47.7668, GNorm = 1.8664, lr_0 = 7.7758e-04
Loss = 4.4913e-01, PNorm = 47.7879, GNorm = 1.4785, lr_0 = 7.7705e-04
Loss = 3.8894e-01, PNorm = 47.8013, GNorm = 1.3188, lr_0 = 7.7651e-04
Loss = 5.3760e-01, PNorm = 47.8152, GNorm = 1.3585, lr_0 = 7.7598e-04
Loss = 4.6731e-01, PNorm = 47.8303, GNorm = 2.1049, lr_0 = 7.7545e-04
Loss = 5.1423e-01, PNorm = 47.8461, GNorm = 1.3954, lr_0 = 7.7492e-04
Loss = 5.0906e-01, PNorm = 47.8636, GNorm = 1.1093, lr_0 = 7.7439e-04
Loss = 5.3002e-01, PNorm = 47.8759, GNorm = 1.4585, lr_0 = 7.7386e-04
Loss = 4.2855e-01, PNorm = 47.8892, GNorm = 1.2571, lr_0 = 7.7333e-04
Loss = 4.5995e-01, PNorm = 47.9034, GNorm = 1.2062, lr_0 = 7.7280e-04
Loss = 4.7209e-01, PNorm = 47.9125, GNorm = 1.0170, lr_0 = 7.7227e-04
Loss = 5.2980e-01, PNorm = 47.9381, GNorm = 1.8102, lr_0 = 7.7174e-04
Loss = 5.2259e-01, PNorm = 47.9662, GNorm = 1.5078, lr_0 = 7.7121e-04
Loss = 4.7283e-01, PNorm = 47.9761, GNorm = 1.7857, lr_0 = 7.7068e-04
Loss = 5.0507e-01, PNorm = 47.9963, GNorm = 1.0307, lr_0 = 7.7015e-04
Loss = 4.7659e-01, PNorm = 48.0164, GNorm = 1.1756, lr_0 = 7.6963e-04
Loss = 4.5414e-01, PNorm = 48.0337, GNorm = 2.8047, lr_0 = 7.6910e-04
Loss = 5.5832e-01, PNorm = 48.0598, GNorm = 1.7088, lr_0 = 7.6857e-04
Loss = 4.6932e-01, PNorm = 48.0821, GNorm = 1.5870, lr_0 = 7.6805e-04
Loss = 4.8179e-01, PNorm = 48.1015, GNorm = 1.5015, lr_0 = 7.6752e-04
Loss = 4.3339e-01, PNorm = 48.1182, GNorm = 1.2825, lr_0 = 7.6699e-04
Loss = 4.7074e-01, PNorm = 48.1332, GNorm = 0.9879, lr_0 = 7.6647e-04
Loss = 3.9274e-01, PNorm = 48.1436, GNorm = 1.1649, lr_0 = 7.6594e-04
Loss = 4.9854e-01, PNorm = 48.1611, GNorm = 1.2312, lr_0 = 7.6542e-04
Loss = 5.1962e-01, PNorm = 48.1799, GNorm = 2.0181, lr_0 = 7.6489e-04
Loss = 5.1548e-01, PNorm = 48.2005, GNorm = 2.3399, lr_0 = 7.6437e-04
Loss = 5.0845e-01, PNorm = 48.2108, GNorm = 1.2746, lr_0 = 7.6385e-04
Loss = 4.4608e-01, PNorm = 48.2365, GNorm = 1.5375, lr_0 = 7.6332e-04
Loss = 4.8083e-01, PNorm = 48.2631, GNorm = 1.1975, lr_0 = 7.6280e-04
Loss = 4.8662e-01, PNorm = 48.2805, GNorm = 1.2745, lr_0 = 7.6228e-04
Loss = 4.7538e-01, PNorm = 48.3053, GNorm = 1.4292, lr_0 = 7.6176e-04
Loss = 5.3347e-01, PNorm = 48.3276, GNorm = 1.2211, lr_0 = 7.6123e-04
Loss = 4.7628e-01, PNorm = 48.3482, GNorm = 1.2008, lr_0 = 7.6071e-04
Loss = 5.0094e-01, PNorm = 48.3548, GNorm = 1.6870, lr_0 = 7.6019e-04
Loss = 5.4963e-01, PNorm = 48.3592, GNorm = 1.1676, lr_0 = 7.5967e-04
Loss = 4.6955e-01, PNorm = 48.3782, GNorm = 4.9844, lr_0 = 7.5915e-04
Loss = 5.2133e-01, PNorm = 48.4020, GNorm = 1.6537, lr_0 = 7.5863e-04
Loss = 4.7135e-01, PNorm = 48.4189, GNorm = 1.1968, lr_0 = 7.5811e-04
Loss = 4.7734e-01, PNorm = 48.4334, GNorm = 1.1870, lr_0 = 7.5759e-04
Loss = 4.5956e-01, PNorm = 48.4528, GNorm = 1.5458, lr_0 = 7.5707e-04
Loss = 4.4003e-01, PNorm = 48.4674, GNorm = 1.5522, lr_0 = 7.5655e-04
Loss = 4.8971e-01, PNorm = 48.4772, GNorm = 1.3736, lr_0 = 7.5603e-04
Loss = 4.6362e-01, PNorm = 48.4874, GNorm = 1.5994, lr_0 = 7.5552e-04
Loss = 5.1137e-01, PNorm = 48.5049, GNorm = 2.0382, lr_0 = 7.5500e-04
Loss = 4.5341e-01, PNorm = 48.5296, GNorm = 2.6433, lr_0 = 7.5448e-04
Loss = 4.5358e-01, PNorm = 48.5439, GNorm = 1.3614, lr_0 = 7.5397e-04
Loss = 4.9940e-01, PNorm = 48.5614, GNorm = 2.4821, lr_0 = 7.5345e-04
Loss = 5.0770e-01, PNorm = 48.5747, GNorm = 0.9752, lr_0 = 7.5293e-04
Loss = 4.7924e-01, PNorm = 48.5948, GNorm = 1.0353, lr_0 = 7.5242e-04
Loss = 4.8295e-01, PNorm = 48.6044, GNorm = 1.2474, lr_0 = 7.5190e-04
Loss = 4.8697e-01, PNorm = 48.6150, GNorm = 1.2664, lr_0 = 7.5139e-04
Loss = 5.2280e-01, PNorm = 48.6354, GNorm = 2.3487, lr_0 = 7.5087e-04
Loss = 4.4613e-01, PNorm = 48.6581, GNorm = 2.3625, lr_0 = 7.5036e-04
Loss = 4.0533e-01, PNorm = 48.6723, GNorm = 1.7613, lr_0 = 7.4984e-04
Loss = 4.3374e-01, PNorm = 48.6861, GNorm = 2.1232, lr_0 = 7.4933e-04
Loss = 4.4167e-01, PNorm = 48.7023, GNorm = 0.9524, lr_0 = 7.4882e-04
Loss = 4.6348e-01, PNorm = 48.7180, GNorm = 2.3191, lr_0 = 7.4830e-04
Loss = 4.2226e-01, PNorm = 48.7292, GNorm = 1.0205, lr_0 = 7.4779e-04
Loss = 5.0505e-01, PNorm = 48.7488, GNorm = 3.3440, lr_0 = 7.4728e-04
Loss = 4.3371e-01, PNorm = 48.7662, GNorm = 0.7390, lr_0 = 7.4677e-04
Loss = 5.0957e-01, PNorm = 48.7803, GNorm = 1.0545, lr_0 = 7.4625e-04
Loss = 4.2627e-01, PNorm = 48.8051, GNorm = 0.9612, lr_0 = 7.4574e-04
Loss = 4.2819e-01, PNorm = 48.8205, GNorm = 1.1245, lr_0 = 7.4523e-04
Loss = 4.6555e-01, PNorm = 48.8383, GNorm = 2.1007, lr_0 = 7.4472e-04
Loss = 4.5295e-01, PNorm = 48.8520, GNorm = 1.3961, lr_0 = 7.4421e-04
Loss = 5.0124e-01, PNorm = 48.8636, GNorm = 1.2879, lr_0 = 7.4370e-04
Loss = 4.1405e-01, PNorm = 48.8883, GNorm = 1.2491, lr_0 = 7.4319e-04
Loss = 4.1019e-01, PNorm = 48.9063, GNorm = 1.6021, lr_0 = 7.4268e-04
Loss = 5.2084e-01, PNorm = 48.9202, GNorm = 1.8463, lr_0 = 7.4217e-04
Loss = 5.3066e-01, PNorm = 48.9416, GNorm = 1.0374, lr_0 = 7.4167e-04
Loss = 4.2997e-01, PNorm = 48.9689, GNorm = 0.8316, lr_0 = 7.4116e-04
Loss = 4.8675e-01, PNorm = 48.9842, GNorm = 1.1515, lr_0 = 7.4065e-04
Loss = 6.1893e-01, PNorm = 49.0004, GNorm = 1.2578, lr_0 = 7.4014e-04
Loss = 4.5587e-01, PNorm = 49.0203, GNorm = 1.1770, lr_0 = 7.3964e-04
Loss = 4.4457e-01, PNorm = 49.0360, GNorm = 1.1555, lr_0 = 7.3913e-04
Loss = 4.1553e-01, PNorm = 49.0470, GNorm = 0.9171, lr_0 = 7.3862e-04
Loss = 4.5965e-01, PNorm = 49.0620, GNorm = 1.8476, lr_0 = 7.3812e-04
Loss = 4.2975e-01, PNorm = 49.0823, GNorm = 1.5871, lr_0 = 7.3761e-04
Loss = 4.3912e-01, PNorm = 49.0941, GNorm = 1.1753, lr_0 = 7.3711e-04
Loss = 5.2422e-01, PNorm = 49.1061, GNorm = 2.1953, lr_0 = 7.3660e-04
Loss = 4.3825e-01, PNorm = 49.1190, GNorm = 1.4597, lr_0 = 7.3610e-04
Loss = 4.7215e-01, PNorm = 49.1301, GNorm = 1.3150, lr_0 = 7.3559e-04
Loss = 4.2801e-01, PNorm = 49.1475, GNorm = 0.8994, lr_0 = 7.3509e-04
Loss = 5.4074e-01, PNorm = 49.1599, GNorm = 2.1052, lr_0 = 7.3458e-04
Loss = 4.4065e-01, PNorm = 49.1812, GNorm = 1.1361, lr_0 = 7.3408e-04
Loss = 4.0273e-01, PNorm = 49.1989, GNorm = 1.1113, lr_0 = 7.3358e-04
Loss = 4.4486e-01, PNorm = 49.2134, GNorm = 2.5720, lr_0 = 7.3308e-04
Loss = 4.8513e-01, PNorm = 49.2272, GNorm = 1.4001, lr_0 = 7.3257e-04
Loss = 4.4790e-01, PNorm = 49.2375, GNorm = 1.1228, lr_0 = 7.3207e-04
Loss = 4.6738e-01, PNorm = 49.2505, GNorm = 0.9770, lr_0 = 7.3157e-04
Loss = 3.9874e-01, PNorm = 49.2618, GNorm = 1.4242, lr_0 = 7.3107e-04
Loss = 4.5682e-01, PNorm = 49.2739, GNorm = 1.6987, lr_0 = 7.3057e-04
Loss = 5.1479e-01, PNorm = 49.2798, GNorm = 1.3382, lr_0 = 7.3007e-04
Loss = 4.2098e-01, PNorm = 49.2991, GNorm = 1.5998, lr_0 = 7.2957e-04
Loss = 5.1883e-01, PNorm = 49.3171, GNorm = 1.9495, lr_0 = 7.2907e-04
Loss = 4.7556e-01, PNorm = 49.3239, GNorm = 1.9316, lr_0 = 7.2857e-04
Loss = 4.8277e-01, PNorm = 49.3363, GNorm = 1.0991, lr_0 = 7.2807e-04
Loss = 4.5488e-01, PNorm = 49.3565, GNorm = 1.0126, lr_0 = 7.2757e-04
Loss = 4.7272e-01, PNorm = 49.3667, GNorm = 2.0792, lr_0 = 7.2707e-04
Loss = 4.5951e-01, PNorm = 49.3739, GNorm = 1.0542, lr_0 = 7.2657e-04
Loss = 4.0566e-01, PNorm = 49.3873, GNorm = 2.6375, lr_0 = 7.2608e-04
Loss = 4.5130e-01, PNorm = 49.4020, GNorm = 1.2941, lr_0 = 7.2558e-04
Loss = 5.1060e-01, PNorm = 49.4184, GNorm = 1.3362, lr_0 = 7.2508e-04
Loss = 3.9714e-01, PNorm = 49.4394, GNorm = 1.1846, lr_0 = 7.2458e-04
Loss = 4.4778e-01, PNorm = 49.4563, GNorm = 1.3328, lr_0 = 7.2409e-04
Loss = 5.0083e-01, PNorm = 49.4739, GNorm = 1.2679, lr_0 = 7.2359e-04
Loss = 4.3226e-01, PNorm = 49.4941, GNorm = 1.1345, lr_0 = 7.2310e-04
Loss = 5.0635e-01, PNorm = 49.5009, GNorm = 1.8693, lr_0 = 7.2260e-04
Loss = 4.2477e-01, PNorm = 49.5165, GNorm = 1.1051, lr_0 = 7.2211e-04
Loss = 4.9542e-01, PNorm = 49.5361, GNorm = 1.8959, lr_0 = 7.2161e-04
Loss = 4.9752e-01, PNorm = 49.5561, GNorm = 1.2169, lr_0 = 7.2112e-04
Loss = 4.8307e-01, PNorm = 49.5755, GNorm = 1.9457, lr_0 = 7.2062e-04
Loss = 4.7983e-01, PNorm = 49.5956, GNorm = 2.6295, lr_0 = 7.2013e-04
Loss = 4.6909e-01, PNorm = 49.6129, GNorm = 0.9533, lr_0 = 7.1964e-04
Validation mae = 0.118983
Epoch 6
Loss = 4.9561e-01, PNorm = 49.6318, GNorm = 0.9698, lr_0 = 7.1914e-04
Loss = 5.1728e-01, PNorm = 49.6527, GNorm = 1.4626, lr_0 = 7.1865e-04
Loss = 5.1304e-01, PNorm = 49.6695, GNorm = 1.6535, lr_0 = 7.1816e-04
Loss = 4.6619e-01, PNorm = 49.6848, GNorm = 1.0256, lr_0 = 7.1767e-04
Loss = 4.2854e-01, PNorm = 49.7037, GNorm = 1.3735, lr_0 = 7.1717e-04
Loss = 5.3060e-01, PNorm = 49.7181, GNorm = 0.9640, lr_0 = 7.1668e-04
Loss = 5.2506e-01, PNorm = 49.7319, GNorm = 1.1924, lr_0 = 7.1619e-04
Loss = 5.2978e-01, PNorm = 49.7515, GNorm = 3.0068, lr_0 = 7.1570e-04
Loss = 4.4522e-01, PNorm = 49.7663, GNorm = 1.7472, lr_0 = 7.1521e-04
Loss = 4.4971e-01, PNorm = 49.7871, GNorm = 1.4541, lr_0 = 7.1472e-04
Loss = 4.1113e-01, PNorm = 49.8044, GNorm = 0.7666, lr_0 = 7.1423e-04
Loss = 4.6828e-01, PNorm = 49.8158, GNorm = 1.6574, lr_0 = 7.1374e-04
Loss = 3.8900e-01, PNorm = 49.8348, GNorm = 1.4422, lr_0 = 7.1325e-04
Loss = 4.3116e-01, PNorm = 49.8544, GNorm = 2.0181, lr_0 = 7.1277e-04
Loss = 4.6841e-01, PNorm = 49.8755, GNorm = 1.5157, lr_0 = 7.1228e-04
Loss = 4.3052e-01, PNorm = 49.8896, GNorm = 1.1928, lr_0 = 7.1179e-04
Loss = 5.1106e-01, PNorm = 49.9072, GNorm = 1.6795, lr_0 = 7.1130e-04
Loss = 4.5195e-01, PNorm = 49.9251, GNorm = 2.1171, lr_0 = 7.1081e-04
Loss = 5.3467e-01, PNorm = 49.9345, GNorm = 1.7468, lr_0 = 7.1033e-04
Loss = 5.1633e-01, PNorm = 49.9561, GNorm = 2.0820, lr_0 = 7.0984e-04
Loss = 4.7996e-01, PNorm = 49.9803, GNorm = 1.5642, lr_0 = 7.0935e-04
Loss = 4.0100e-01, PNorm = 49.9988, GNorm = 1.3381, lr_0 = 7.0887e-04
Loss = 5.4793e-01, PNorm = 50.0163, GNorm = 1.3710, lr_0 = 7.0838e-04
Loss = 4.1376e-01, PNorm = 50.0444, GNorm = 1.3718, lr_0 = 7.0790e-04
Loss = 4.4788e-01, PNorm = 50.0584, GNorm = 1.3241, lr_0 = 7.0741e-04
Loss = 4.8630e-01, PNorm = 50.0759, GNorm = 1.5169, lr_0 = 7.0693e-04
Loss = 4.5454e-01, PNorm = 50.0961, GNorm = 0.9927, lr_0 = 7.0644e-04
Loss = 4.4434e-01, PNorm = 50.1117, GNorm = 1.4608, lr_0 = 7.0596e-04
Loss = 4.8128e-01, PNorm = 50.1269, GNorm = 1.2318, lr_0 = 7.0548e-04
Loss = 4.1846e-01, PNorm = 50.1395, GNorm = 1.2506, lr_0 = 7.0499e-04
Loss = 5.0263e-01, PNorm = 50.1561, GNorm = 1.7084, lr_0 = 7.0451e-04
Loss = 4.5262e-01, PNorm = 50.1671, GNorm = 1.4301, lr_0 = 7.0403e-04
Loss = 4.0110e-01, PNorm = 50.1816, GNorm = 1.4873, lr_0 = 7.0354e-04
Loss = 4.7648e-01, PNorm = 50.1952, GNorm = 1.8033, lr_0 = 7.0306e-04
Loss = 4.9542e-01, PNorm = 50.2102, GNorm = 0.9087, lr_0 = 7.0258e-04
Loss = 3.8790e-01, PNorm = 50.2293, GNorm = 0.8163, lr_0 = 7.0210e-04
Loss = 4.3804e-01, PNorm = 50.2468, GNorm = 1.7308, lr_0 = 7.0162e-04
Loss = 4.5613e-01, PNorm = 50.2719, GNorm = 2.9575, lr_0 = 7.0114e-04
Loss = 5.5495e-01, PNorm = 50.2873, GNorm = 1.4127, lr_0 = 7.0066e-04
Loss = 4.6516e-01, PNorm = 50.3131, GNorm = 1.2511, lr_0 = 7.0018e-04
Loss = 4.9199e-01, PNorm = 50.3268, GNorm = 1.5213, lr_0 = 6.9970e-04
Loss = 4.6319e-01, PNorm = 50.3372, GNorm = 1.1199, lr_0 = 6.9922e-04
Loss = 4.5535e-01, PNorm = 50.3539, GNorm = 1.0785, lr_0 = 6.9874e-04
Loss = 4.3584e-01, PNorm = 50.3649, GNorm = 0.9962, lr_0 = 6.9826e-04
Loss = 4.5673e-01, PNorm = 50.3816, GNorm = 2.1072, lr_0 = 6.9778e-04
Loss = 4.6111e-01, PNorm = 50.4067, GNorm = 1.5056, lr_0 = 6.9730e-04
Loss = 4.7221e-01, PNorm = 50.4284, GNorm = 1.4838, lr_0 = 6.9683e-04
Loss = 4.4522e-01, PNorm = 50.4437, GNorm = 1.2510, lr_0 = 6.9635e-04
Loss = 4.6637e-01, PNorm = 50.4596, GNorm = 1.6281, lr_0 = 6.9587e-04
Loss = 4.5555e-01, PNorm = 50.4731, GNorm = 1.4811, lr_0 = 6.9540e-04
Loss = 4.5672e-01, PNorm = 50.4846, GNorm = 1.4753, lr_0 = 6.9492e-04
Loss = 4.0630e-01, PNorm = 50.4981, GNorm = 1.4927, lr_0 = 6.9444e-04
Loss = 4.1339e-01, PNorm = 50.5080, GNorm = 0.7995, lr_0 = 6.9397e-04
Loss = 4.3606e-01, PNorm = 50.5263, GNorm = 1.5859, lr_0 = 6.9349e-04
Loss = 4.3254e-01, PNorm = 50.5457, GNorm = 1.5275, lr_0 = 6.9302e-04
Loss = 4.5048e-01, PNorm = 50.5643, GNorm = 1.5984, lr_0 = 6.9254e-04
Loss = 4.4222e-01, PNorm = 50.5768, GNorm = 1.0856, lr_0 = 6.9207e-04
Loss = 4.4904e-01, PNorm = 50.5942, GNorm = 1.7242, lr_0 = 6.9159e-04
Loss = 4.3372e-01, PNorm = 50.6143, GNorm = 1.1934, lr_0 = 6.9112e-04
Loss = 3.8586e-01, PNorm = 50.6190, GNorm = 1.6527, lr_0 = 6.9065e-04
Loss = 4.4149e-01, PNorm = 50.6271, GNorm = 1.3932, lr_0 = 6.9017e-04
Loss = 4.4214e-01, PNorm = 50.6460, GNorm = 1.1324, lr_0 = 6.8970e-04
Loss = 5.1764e-01, PNorm = 50.6687, GNorm = 1.7312, lr_0 = 6.8923e-04
Loss = 4.5753e-01, PNorm = 50.6818, GNorm = 1.2929, lr_0 = 6.8876e-04
Loss = 4.7156e-01, PNorm = 50.6978, GNorm = 2.8368, lr_0 = 6.8828e-04
Loss = 4.5313e-01, PNorm = 50.7082, GNorm = 1.3520, lr_0 = 6.8781e-04
Loss = 4.4929e-01, PNorm = 50.7324, GNorm = 1.7455, lr_0 = 6.8734e-04
Loss = 3.8095e-01, PNorm = 50.7498, GNorm = 0.8654, lr_0 = 6.8687e-04
Loss = 4.6862e-01, PNorm = 50.7611, GNorm = 1.1764, lr_0 = 6.8640e-04
Loss = 4.2926e-01, PNorm = 50.7750, GNorm = 0.9337, lr_0 = 6.8593e-04
Loss = 4.7517e-01, PNorm = 50.7926, GNorm = 1.5086, lr_0 = 6.8546e-04
Loss = 3.8949e-01, PNorm = 50.8112, GNorm = 1.3269, lr_0 = 6.8499e-04
Loss = 4.4262e-01, PNorm = 50.8277, GNorm = 1.2398, lr_0 = 6.8452e-04
Loss = 4.9581e-01, PNorm = 50.8367, GNorm = 1.3181, lr_0 = 6.8405e-04
Loss = 4.4386e-01, PNorm = 50.8486, GNorm = 1.0703, lr_0 = 6.8358e-04
Loss = 4.0464e-01, PNorm = 50.8643, GNorm = 1.7115, lr_0 = 6.8312e-04
Loss = 4.8129e-01, PNorm = 50.8773, GNorm = 0.9636, lr_0 = 6.8265e-04
Loss = 4.1640e-01, PNorm = 50.8912, GNorm = 1.2747, lr_0 = 6.8218e-04
Loss = 4.8485e-01, PNorm = 50.9040, GNorm = 3.1729, lr_0 = 6.8171e-04
Loss = 4.2549e-01, PNorm = 50.9162, GNorm = 1.3848, lr_0 = 6.8125e-04
Loss = 4.3239e-01, PNorm = 50.9353, GNorm = 1.1346, lr_0 = 6.8078e-04
Loss = 4.3293e-01, PNorm = 50.9493, GNorm = 1.7640, lr_0 = 6.8031e-04
Loss = 4.3801e-01, PNorm = 50.9552, GNorm = 1.8385, lr_0 = 6.7985e-04
Loss = 4.0435e-01, PNorm = 50.9613, GNorm = 1.3574, lr_0 = 6.7938e-04
Loss = 4.2277e-01, PNorm = 50.9707, GNorm = 1.2402, lr_0 = 6.7892e-04
Loss = 4.2630e-01, PNorm = 50.9828, GNorm = 1.2050, lr_0 = 6.7845e-04
Loss = 4.3277e-01, PNorm = 50.9916, GNorm = 1.0471, lr_0 = 6.7799e-04
Loss = 4.8753e-01, PNorm = 51.0050, GNorm = 1.0960, lr_0 = 6.7752e-04
Loss = 4.9652e-01, PNorm = 51.0215, GNorm = 1.3811, lr_0 = 6.7706e-04
Loss = 4.1866e-01, PNorm = 51.0336, GNorm = 2.4015, lr_0 = 6.7659e-04
Loss = 4.2087e-01, PNorm = 51.0379, GNorm = 1.1057, lr_0 = 6.7613e-04
Loss = 4.0913e-01, PNorm = 51.0501, GNorm = 2.1995, lr_0 = 6.7567e-04
Loss = 4.6907e-01, PNorm = 51.0597, GNorm = 1.1931, lr_0 = 6.7520e-04
Loss = 4.5241e-01, PNorm = 51.0778, GNorm = 1.0955, lr_0 = 6.7474e-04
Loss = 4.3922e-01, PNorm = 51.0904, GNorm = 1.8154, lr_0 = 6.7428e-04
Loss = 5.0470e-01, PNorm = 51.1062, GNorm = 1.8838, lr_0 = 6.7382e-04
Loss = 4.7881e-01, PNorm = 51.1241, GNorm = 1.1968, lr_0 = 6.7335e-04
Loss = 3.8291e-01, PNorm = 51.1419, GNorm = 1.2489, lr_0 = 6.7289e-04
Loss = 3.6154e-01, PNorm = 51.1517, GNorm = 1.1605, lr_0 = 6.7243e-04
Loss = 4.6833e-01, PNorm = 51.1585, GNorm = 1.1328, lr_0 = 6.7197e-04
Loss = 4.7742e-01, PNorm = 51.1740, GNorm = 2.2174, lr_0 = 6.7151e-04
Loss = 5.0271e-01, PNorm = 51.1852, GNorm = 1.2167, lr_0 = 6.7105e-04
Loss = 4.4060e-01, PNorm = 51.1981, GNorm = 2.5678, lr_0 = 6.7059e-04
Loss = 4.2137e-01, PNorm = 51.2177, GNorm = 0.9561, lr_0 = 6.7013e-04
Loss = 4.9209e-01, PNorm = 51.2308, GNorm = 1.0666, lr_0 = 6.6967e-04
Loss = 4.7027e-01, PNorm = 51.2407, GNorm = 1.3369, lr_0 = 6.6921e-04
Loss = 4.8827e-01, PNorm = 51.2571, GNorm = 1.1911, lr_0 = 6.6876e-04
Loss = 5.5522e-01, PNorm = 51.2736, GNorm = 1.3819, lr_0 = 6.6830e-04
Loss = 4.5678e-01, PNorm = 51.3024, GNorm = 1.3596, lr_0 = 6.6784e-04
Loss = 4.5310e-01, PNorm = 51.3237, GNorm = 2.1077, lr_0 = 6.6738e-04
Loss = 4.7825e-01, PNorm = 51.3375, GNorm = 1.0680, lr_0 = 6.6693e-04
Loss = 5.1037e-01, PNorm = 51.3536, GNorm = 1.1370, lr_0 = 6.6647e-04
Loss = 5.4446e-01, PNorm = 51.3661, GNorm = 1.3395, lr_0 = 6.6601e-04
Loss = 4.8542e-01, PNorm = 51.3774, GNorm = 0.9941, lr_0 = 6.6556e-04
Loss = 4.6519e-01, PNorm = 51.3989, GNorm = 1.2978, lr_0 = 6.6510e-04
Loss = 4.4588e-01, PNorm = 51.4150, GNorm = 2.1499, lr_0 = 6.6464e-04
Loss = 5.0789e-01, PNorm = 51.4258, GNorm = 1.7713, lr_0 = 6.6419e-04
Loss = 4.5356e-01, PNorm = 51.4408, GNorm = 1.7865, lr_0 = 6.6373e-04
Loss = 4.6687e-01, PNorm = 51.4492, GNorm = 1.4525, lr_0 = 6.6328e-04
Loss = 4.4506e-01, PNorm = 51.4609, GNorm = 0.8116, lr_0 = 6.6282e-04
Validation mae = 0.118684
Epoch 7
Loss = 5.7175e-01, PNorm = 51.4690, GNorm = 1.6614, lr_0 = 6.6237e-04
Loss = 4.7438e-01, PNorm = 51.4832, GNorm = 1.1146, lr_0 = 6.6192e-04
Loss = 4.8533e-01, PNorm = 51.5016, GNorm = 1.1664, lr_0 = 6.6146e-04
Loss = 4.3879e-01, PNorm = 51.5156, GNorm = 1.1784, lr_0 = 6.6101e-04
Loss = 3.9993e-01, PNorm = 51.5330, GNorm = 1.0149, lr_0 = 6.6056e-04
Loss = 3.5572e-01, PNorm = 51.5460, GNorm = 1.7612, lr_0 = 6.6011e-04
Loss = 4.3538e-01, PNorm = 51.5486, GNorm = 1.3593, lr_0 = 6.5965e-04
Loss = 5.0272e-01, PNorm = 51.5691, GNorm = 1.4204, lr_0 = 6.5920e-04
Loss = 4.7508e-01, PNorm = 51.5877, GNorm = 1.5112, lr_0 = 6.5875e-04
Loss = 4.2307e-01, PNorm = 51.6040, GNorm = 1.1717, lr_0 = 6.5830e-04
Loss = 4.4292e-01, PNorm = 51.6120, GNorm = 1.7791, lr_0 = 6.5785e-04
Loss = 5.0078e-01, PNorm = 51.6191, GNorm = 1.4415, lr_0 = 6.5740e-04
Loss = 3.7380e-01, PNorm = 51.6289, GNorm = 1.3863, lr_0 = 6.5695e-04
Loss = 4.2886e-01, PNorm = 51.6461, GNorm = 1.9161, lr_0 = 6.5650e-04
Loss = 4.9067e-01, PNorm = 51.6559, GNorm = 1.0525, lr_0 = 6.5605e-04
Loss = 4.3369e-01, PNorm = 51.6693, GNorm = 1.4135, lr_0 = 6.5560e-04
Loss = 3.9802e-01, PNorm = 51.6825, GNorm = 1.3829, lr_0 = 6.5515e-04
Loss = 4.4513e-01, PNorm = 51.6951, GNorm = 0.8986, lr_0 = 6.5470e-04
Loss = 4.3009e-01, PNorm = 51.7170, GNorm = 1.2561, lr_0 = 6.5425e-04
Loss = 4.7366e-01, PNorm = 51.7371, GNorm = 1.2347, lr_0 = 6.5380e-04
Loss = 4.8279e-01, PNorm = 51.7476, GNorm = 2.4059, lr_0 = 6.5335e-04
Loss = 4.9169e-01, PNorm = 51.7622, GNorm = 0.9445, lr_0 = 6.5291e-04
Loss = 4.9876e-01, PNorm = 51.7851, GNorm = 1.2117, lr_0 = 6.5246e-04
Loss = 4.2503e-01, PNorm = 51.8087, GNorm = 1.2562, lr_0 = 6.5201e-04
Loss = 4.3627e-01, PNorm = 51.8239, GNorm = 0.9417, lr_0 = 6.5157e-04
Loss = 4.3087e-01, PNorm = 51.8344, GNorm = 2.6442, lr_0 = 6.5112e-04
Loss = 4.1106e-01, PNorm = 51.8492, GNorm = 1.1306, lr_0 = 6.5067e-04
Loss = 4.5416e-01, PNorm = 51.8613, GNorm = 1.3959, lr_0 = 6.5023e-04
Loss = 4.2190e-01, PNorm = 51.8756, GNorm = 1.0626, lr_0 = 6.4978e-04
Loss = 3.8149e-01, PNorm = 51.8817, GNorm = 1.1155, lr_0 = 6.4934e-04
Loss = 4.3677e-01, PNorm = 51.8902, GNorm = 1.3339, lr_0 = 6.4889e-04
Loss = 4.1911e-01, PNorm = 51.9053, GNorm = 1.6747, lr_0 = 6.4845e-04
Loss = 4.4162e-01, PNorm = 51.9218, GNorm = 1.6391, lr_0 = 6.4800e-04
Loss = 4.3521e-01, PNorm = 51.9367, GNorm = 1.5460, lr_0 = 6.4756e-04
Loss = 4.6759e-01, PNorm = 51.9475, GNorm = 2.3369, lr_0 = 6.4712e-04
Loss = 4.8136e-01, PNorm = 51.9607, GNorm = 1.2773, lr_0 = 6.4667e-04
Loss = 3.9346e-01, PNorm = 51.9743, GNorm = 1.6388, lr_0 = 6.4623e-04
Loss = 4.1455e-01, PNorm = 51.9919, GNorm = 0.8244, lr_0 = 6.4579e-04
Loss = 4.2365e-01, PNorm = 52.0025, GNorm = 1.8735, lr_0 = 6.4534e-04
Loss = 4.5076e-01, PNorm = 52.0239, GNorm = 1.4472, lr_0 = 6.4490e-04
Loss = 4.7683e-01, PNorm = 52.0347, GNorm = 1.1472, lr_0 = 6.4446e-04
Loss = 4.9203e-01, PNorm = 52.0521, GNorm = 2.2813, lr_0 = 6.4402e-04
Loss = 4.8791e-01, PNorm = 52.0605, GNorm = 1.6350, lr_0 = 6.4358e-04
Loss = 5.3305e-01, PNorm = 52.0749, GNorm = 1.0333, lr_0 = 6.4314e-04
Loss = 4.3653e-01, PNorm = 52.0941, GNorm = 3.2112, lr_0 = 6.4270e-04
Loss = 4.6406e-01, PNorm = 52.1109, GNorm = 2.1761, lr_0 = 6.4226e-04
Loss = 4.3250e-01, PNorm = 52.1281, GNorm = 2.4006, lr_0 = 6.4182e-04
Loss = 4.2494e-01, PNorm = 52.1415, GNorm = 1.4416, lr_0 = 6.4138e-04
Loss = 4.0851e-01, PNorm = 52.1562, GNorm = 1.3357, lr_0 = 6.4094e-04
Loss = 3.9812e-01, PNorm = 52.1695, GNorm = 1.3946, lr_0 = 6.4050e-04
Loss = 4.1334e-01, PNorm = 52.1851, GNorm = 0.9602, lr_0 = 6.4006e-04
Loss = 4.2503e-01, PNorm = 52.1982, GNorm = 1.8945, lr_0 = 6.3962e-04
Loss = 4.4214e-01, PNorm = 52.2064, GNorm = 1.3071, lr_0 = 6.3918e-04
Loss = 4.1602e-01, PNorm = 52.2166, GNorm = 1.2247, lr_0 = 6.3874e-04
Loss = 4.0575e-01, PNorm = 52.2285, GNorm = 0.8832, lr_0 = 6.3831e-04
Loss = 4.6737e-01, PNorm = 52.2359, GNorm = 1.5014, lr_0 = 6.3787e-04
Loss = 3.8678e-01, PNorm = 52.2498, GNorm = 0.8526, lr_0 = 6.3743e-04
Loss = 4.0911e-01, PNorm = 52.2612, GNorm = 1.9100, lr_0 = 6.3700e-04
Loss = 3.9456e-01, PNorm = 52.2705, GNorm = 2.1705, lr_0 = 6.3656e-04
Loss = 4.3083e-01, PNorm = 52.2827, GNorm = 1.2363, lr_0 = 6.3612e-04
Loss = 4.1193e-01, PNorm = 52.2970, GNorm = 0.9218, lr_0 = 6.3569e-04
Loss = 4.7251e-01, PNorm = 52.3103, GNorm = 1.9678, lr_0 = 6.3525e-04
Loss = 4.4431e-01, PNorm = 52.3206, GNorm = 1.5438, lr_0 = 6.3482e-04
Loss = 3.8027e-01, PNorm = 52.3305, GNorm = 1.2619, lr_0 = 6.3438e-04
Loss = 4.1264e-01, PNorm = 52.3446, GNorm = 1.2714, lr_0 = 6.3395e-04
Loss = 4.1524e-01, PNorm = 52.3586, GNorm = 1.5348, lr_0 = 6.3351e-04
Loss = 4.2661e-01, PNorm = 52.3708, GNorm = 1.3348, lr_0 = 6.3308e-04
Loss = 4.6560e-01, PNorm = 52.3835, GNorm = 1.1967, lr_0 = 6.3265e-04
Loss = 4.6873e-01, PNorm = 52.3988, GNorm = 1.1529, lr_0 = 6.3221e-04
Loss = 4.8931e-01, PNorm = 52.4191, GNorm = 2.7377, lr_0 = 6.3178e-04
Loss = 4.6075e-01, PNorm = 52.4328, GNorm = 1.1435, lr_0 = 6.3135e-04
Loss = 5.1632e-01, PNorm = 52.4415, GNorm = 2.5864, lr_0 = 6.3091e-04
Loss = 4.4513e-01, PNorm = 52.4517, GNorm = 1.2796, lr_0 = 6.3048e-04
Loss = 4.1807e-01, PNorm = 52.4637, GNorm = 1.4762, lr_0 = 6.3005e-04
Loss = 4.3785e-01, PNorm = 52.4773, GNorm = 2.6521, lr_0 = 6.2962e-04
Loss = 4.8659e-01, PNorm = 52.4972, GNorm = 2.2346, lr_0 = 6.2919e-04
Loss = 4.6258e-01, PNorm = 52.5143, GNorm = 2.2413, lr_0 = 6.2876e-04
Loss = 4.1202e-01, PNorm = 52.5297, GNorm = 2.0146, lr_0 = 6.2833e-04
Loss = 4.4512e-01, PNorm = 52.5436, GNorm = 2.7669, lr_0 = 6.2789e-04
Loss = 4.2566e-01, PNorm = 52.5590, GNorm = 1.3528, lr_0 = 6.2746e-04
Loss = 4.4833e-01, PNorm = 52.5677, GNorm = 1.2125, lr_0 = 6.2703e-04
Loss = 4.5747e-01, PNorm = 52.5788, GNorm = 1.4414, lr_0 = 6.2661e-04
Loss = 4.8422e-01, PNorm = 52.5943, GNorm = 2.6286, lr_0 = 6.2618e-04
Loss = 5.2759e-01, PNorm = 52.6031, GNorm = 2.0693, lr_0 = 6.2575e-04
Loss = 4.2912e-01, PNorm = 52.6160, GNorm = 1.0969, lr_0 = 6.2532e-04
Loss = 4.2650e-01, PNorm = 52.6291, GNorm = 1.5802, lr_0 = 6.2489e-04
Loss = 4.9430e-01, PNorm = 52.6406, GNorm = 1.8120, lr_0 = 6.2446e-04
Loss = 4.2787e-01, PNorm = 52.6572, GNorm = 1.1146, lr_0 = 6.2403e-04
Loss = 4.8712e-01, PNorm = 52.6734, GNorm = 1.6624, lr_0 = 6.2361e-04
Loss = 3.9750e-01, PNorm = 52.6877, GNorm = 1.3846, lr_0 = 6.2318e-04
Loss = 4.0528e-01, PNorm = 52.6989, GNorm = 0.8797, lr_0 = 6.2275e-04
Loss = 3.8011e-01, PNorm = 52.7126, GNorm = 1.6912, lr_0 = 6.2233e-04
Loss = 4.2820e-01, PNorm = 52.7263, GNorm = 1.7222, lr_0 = 6.2190e-04
Loss = 5.6584e-01, PNorm = 52.7385, GNorm = 2.2436, lr_0 = 6.2147e-04
Loss = 4.5766e-01, PNorm = 52.7608, GNorm = 1.4084, lr_0 = 6.2105e-04
Loss = 4.6406e-01, PNorm = 52.7774, GNorm = 1.1190, lr_0 = 6.2062e-04
Loss = 4.6593e-01, PNorm = 52.7971, GNorm = 1.9901, lr_0 = 6.2020e-04
Loss = 4.0836e-01, PNorm = 52.8086, GNorm = 1.3296, lr_0 = 6.1977e-04
Loss = 4.6674e-01, PNorm = 52.8241, GNorm = 0.9550, lr_0 = 6.1935e-04
Loss = 4.4512e-01, PNorm = 52.8345, GNorm = 1.1738, lr_0 = 6.1892e-04
Loss = 4.3963e-01, PNorm = 52.8501, GNorm = 1.8996, lr_0 = 6.1850e-04
Loss = 4.4073e-01, PNorm = 52.8614, GNorm = 1.2177, lr_0 = 6.1808e-04
Loss = 4.6358e-01, PNorm = 52.8699, GNorm = 1.0567, lr_0 = 6.1765e-04
Loss = 4.1243e-01, PNorm = 52.8831, GNorm = 2.0895, lr_0 = 6.1723e-04
Loss = 4.3333e-01, PNorm = 52.8938, GNorm = 2.3276, lr_0 = 6.1681e-04
Loss = 4.0437e-01, PNorm = 52.9079, GNorm = 1.4154, lr_0 = 6.1638e-04
Loss = 4.6067e-01, PNorm = 52.9241, GNorm = 2.2785, lr_0 = 6.1596e-04
Loss = 4.8607e-01, PNorm = 52.9364, GNorm = 1.2549, lr_0 = 6.1554e-04
Loss = 5.4852e-01, PNorm = 52.9438, GNorm = 2.2534, lr_0 = 6.1512e-04
Loss = 4.5713e-01, PNorm = 52.9530, GNorm = 1.2806, lr_0 = 6.1470e-04
Loss = 5.0058e-01, PNorm = 52.9619, GNorm = 1.5383, lr_0 = 6.1428e-04
Loss = 4.3545e-01, PNorm = 52.9787, GNorm = 1.2000, lr_0 = 6.1385e-04
Loss = 4.7617e-01, PNorm = 52.9867, GNorm = 1.7327, lr_0 = 6.1343e-04
Loss = 3.9576e-01, PNorm = 53.0026, GNorm = 1.2610, lr_0 = 6.1301e-04
Loss = 4.1360e-01, PNorm = 53.0177, GNorm = 0.8453, lr_0 = 6.1259e-04
Loss = 3.8148e-01, PNorm = 53.0237, GNorm = 1.2379, lr_0 = 6.1217e-04
Loss = 4.3189e-01, PNorm = 53.0349, GNorm = 1.5803, lr_0 = 6.1175e-04
Loss = 4.3864e-01, PNorm = 53.0459, GNorm = 2.3412, lr_0 = 6.1134e-04
Loss = 4.3163e-01, PNorm = 53.0574, GNorm = 0.9218, lr_0 = 6.1092e-04
Loss = 4.8286e-01, PNorm = 53.0677, GNorm = 1.8387, lr_0 = 6.1050e-04
Validation mae = 0.117908
Epoch 8
Loss = 4.3745e-01, PNorm = 53.0760, GNorm = 1.4448, lr_0 = 6.1008e-04
Loss = 3.8372e-01, PNorm = 53.0907, GNorm = 1.2778, lr_0 = 6.0966e-04
Loss = 4.8764e-01, PNorm = 53.1040, GNorm = 1.1077, lr_0 = 6.0924e-04
Loss = 4.0942e-01, PNorm = 53.1237, GNorm = 1.6645, lr_0 = 6.0883e-04
Loss = 4.0821e-01, PNorm = 53.1456, GNorm = 1.4386, lr_0 = 6.0841e-04
Loss = 4.5324e-01, PNorm = 53.1621, GNorm = 1.4851, lr_0 = 6.0799e-04
Loss = 4.8509e-01, PNorm = 53.1745, GNorm = 1.2900, lr_0 = 6.0758e-04
Loss = 4.6236e-01, PNorm = 53.1938, GNorm = 1.4935, lr_0 = 6.0716e-04
Loss = 4.1766e-01, PNorm = 53.2102, GNorm = 1.5141, lr_0 = 6.0674e-04
Loss = 4.5574e-01, PNorm = 53.2257, GNorm = 1.4915, lr_0 = 6.0633e-04
Loss = 4.5916e-01, PNorm = 53.2329, GNorm = 1.5484, lr_0 = 6.0591e-04
Loss = 5.0591e-01, PNorm = 53.2564, GNorm = 1.7934, lr_0 = 6.0550e-04
Loss = 4.6142e-01, PNorm = 53.2762, GNorm = 1.7860, lr_0 = 6.0508e-04
Loss = 4.0309e-01, PNorm = 53.2932, GNorm = 1.4758, lr_0 = 6.0467e-04
Loss = 4.1255e-01, PNorm = 53.3016, GNorm = 1.3780, lr_0 = 6.0425e-04
Loss = 4.8854e-01, PNorm = 53.3148, GNorm = 1.6423, lr_0 = 6.0384e-04
Loss = 4.2880e-01, PNorm = 53.3227, GNorm = 1.2074, lr_0 = 6.0343e-04
Loss = 4.4503e-01, PNorm = 53.3355, GNorm = 0.9839, lr_0 = 6.0301e-04
Loss = 4.1415e-01, PNorm = 53.3471, GNorm = 1.1077, lr_0 = 6.0260e-04
Loss = 4.0365e-01, PNorm = 53.3627, GNorm = 1.2127, lr_0 = 6.0219e-04
Loss = 4.1914e-01, PNorm = 53.3726, GNorm = 1.0233, lr_0 = 6.0178e-04
Loss = 4.6285e-01, PNorm = 53.3842, GNorm = 1.6950, lr_0 = 6.0136e-04
Loss = 4.3776e-01, PNorm = 53.3991, GNorm = 1.3237, lr_0 = 6.0095e-04
Loss = 3.8370e-01, PNorm = 53.4111, GNorm = 1.0978, lr_0 = 6.0054e-04
Loss = 4.2309e-01, PNorm = 53.4229, GNorm = 1.1543, lr_0 = 6.0013e-04
Loss = 4.9189e-01, PNorm = 53.4338, GNorm = 1.2507, lr_0 = 5.9972e-04
Loss = 4.2366e-01, PNorm = 53.4461, GNorm = 0.9930, lr_0 = 5.9931e-04
Loss = 4.4972e-01, PNorm = 53.4561, GNorm = 1.7720, lr_0 = 5.9890e-04
Loss = 4.3400e-01, PNorm = 53.4648, GNorm = 2.8981, lr_0 = 5.9849e-04
Loss = 4.4655e-01, PNorm = 53.4742, GNorm = 1.4149, lr_0 = 5.9808e-04
Loss = 3.9113e-01, PNorm = 53.4849, GNorm = 1.6148, lr_0 = 5.9767e-04
Loss = 4.4501e-01, PNorm = 53.4961, GNorm = 1.1544, lr_0 = 5.9726e-04
Loss = 4.2967e-01, PNorm = 53.5077, GNorm = 1.5519, lr_0 = 5.9685e-04
Loss = 4.6607e-01, PNorm = 53.5142, GNorm = 1.9483, lr_0 = 5.9644e-04
Loss = 4.3023e-01, PNorm = 53.5187, GNorm = 1.5494, lr_0 = 5.9603e-04
Loss = 4.2795e-01, PNorm = 53.5302, GNorm = 1.0250, lr_0 = 5.9562e-04
Loss = 4.4140e-01, PNorm = 53.5463, GNorm = 1.3081, lr_0 = 5.9521e-04
Loss = 3.6799e-01, PNorm = 53.5629, GNorm = 1.7125, lr_0 = 5.9481e-04
Loss = 4.6867e-01, PNorm = 53.5718, GNorm = 1.5454, lr_0 = 5.9440e-04
Loss = 4.4864e-01, PNorm = 53.5836, GNorm = 1.3010, lr_0 = 5.9399e-04
Loss = 4.3004e-01, PNorm = 53.5984, GNorm = 1.1921, lr_0 = 5.9358e-04
Loss = 4.2264e-01, PNorm = 53.6076, GNorm = 1.4783, lr_0 = 5.9318e-04
Loss = 4.0124e-01, PNorm = 53.6194, GNorm = 1.1925, lr_0 = 5.9277e-04
Loss = 4.3405e-01, PNorm = 53.6344, GNorm = 1.1703, lr_0 = 5.9236e-04
Loss = 4.1610e-01, PNorm = 53.6425, GNorm = 1.0444, lr_0 = 5.9196e-04
Loss = 3.9827e-01, PNorm = 53.6522, GNorm = 1.1005, lr_0 = 5.9155e-04
Loss = 4.3862e-01, PNorm = 53.6668, GNorm = 1.4649, lr_0 = 5.9115e-04
Loss = 4.3548e-01, PNorm = 53.6758, GNorm = 1.8563, lr_0 = 5.9074e-04
Loss = 4.0197e-01, PNorm = 53.6918, GNorm = 1.9536, lr_0 = 5.9034e-04
Loss = 4.4192e-01, PNorm = 53.6986, GNorm = 1.7520, lr_0 = 5.8993e-04
Loss = 4.1709e-01, PNorm = 53.7155, GNorm = 1.1726, lr_0 = 5.8953e-04
Loss = 4.1800e-01, PNorm = 53.7265, GNorm = 1.3030, lr_0 = 5.8913e-04
Loss = 4.8395e-01, PNorm = 53.7354, GNorm = 1.2201, lr_0 = 5.8872e-04
Loss = 4.6068e-01, PNorm = 53.7477, GNorm = 1.2733, lr_0 = 5.8832e-04
Loss = 4.3347e-01, PNorm = 53.7637, GNorm = 1.3607, lr_0 = 5.8792e-04
Loss = 4.7537e-01, PNorm = 53.7747, GNorm = 1.7187, lr_0 = 5.8751e-04
Loss = 4.2041e-01, PNorm = 53.7919, GNorm = 2.0204, lr_0 = 5.8711e-04
Loss = 4.0451e-01, PNorm = 53.7985, GNorm = 2.1314, lr_0 = 5.8671e-04
Loss = 4.1600e-01, PNorm = 53.8129, GNorm = 1.0396, lr_0 = 5.8631e-04
Loss = 4.0611e-01, PNorm = 53.8249, GNorm = 1.3318, lr_0 = 5.8591e-04
Loss = 4.5098e-01, PNorm = 53.8331, GNorm = 1.5256, lr_0 = 5.8550e-04
Loss = 3.5232e-01, PNorm = 53.8449, GNorm = 1.7665, lr_0 = 5.8510e-04
Loss = 4.3872e-01, PNorm = 53.8557, GNorm = 1.3434, lr_0 = 5.8470e-04
Loss = 3.6654e-01, PNorm = 53.8715, GNorm = 1.0474, lr_0 = 5.8430e-04
Loss = 3.9455e-01, PNorm = 53.8854, GNorm = 1.2480, lr_0 = 5.8390e-04
Loss = 4.7988e-01, PNorm = 53.8987, GNorm = 1.7715, lr_0 = 5.8350e-04
Loss = 4.7903e-01, PNorm = 53.9133, GNorm = 1.6092, lr_0 = 5.8310e-04
Loss = 4.4600e-01, PNorm = 53.9221, GNorm = 0.9439, lr_0 = 5.8270e-04
Loss = 3.8443e-01, PNorm = 53.9283, GNorm = 1.0426, lr_0 = 5.8230e-04
Loss = 4.6138e-01, PNorm = 53.9415, GNorm = 2.0549, lr_0 = 5.8190e-04
Loss = 3.9273e-01, PNorm = 53.9508, GNorm = 1.0707, lr_0 = 5.8151e-04
Loss = 4.4762e-01, PNorm = 53.9582, GNorm = 1.4664, lr_0 = 5.8111e-04
Loss = 4.2911e-01, PNorm = 53.9683, GNorm = 1.0755, lr_0 = 5.8071e-04
Loss = 4.9782e-01, PNorm = 53.9823, GNorm = 1.8292, lr_0 = 5.8031e-04
Loss = 4.1335e-01, PNorm = 53.9946, GNorm = 0.9384, lr_0 = 5.7991e-04
Loss = 4.0271e-01, PNorm = 54.0086, GNorm = 1.0915, lr_0 = 5.7952e-04
Loss = 4.2371e-01, PNorm = 54.0129, GNorm = 1.2115, lr_0 = 5.7912e-04
Loss = 4.3150e-01, PNorm = 54.0201, GNorm = 1.3114, lr_0 = 5.7872e-04
Loss = 4.6948e-01, PNorm = 54.0330, GNorm = 1.8033, lr_0 = 5.7833e-04
Loss = 3.3590e-01, PNorm = 54.0435, GNorm = 1.0762, lr_0 = 5.7793e-04
Loss = 4.3090e-01, PNorm = 54.0485, GNorm = 2.2799, lr_0 = 5.7753e-04
Loss = 4.0914e-01, PNorm = 54.0632, GNorm = 2.0438, lr_0 = 5.7714e-04
Loss = 4.0562e-01, PNorm = 54.0782, GNorm = 0.8814, lr_0 = 5.7674e-04
Loss = 4.3683e-01, PNorm = 54.0928, GNorm = 1.4283, lr_0 = 5.7635e-04
Loss = 4.4587e-01, PNorm = 54.1013, GNorm = 0.8319, lr_0 = 5.7595e-04
Loss = 4.1073e-01, PNorm = 54.1114, GNorm = 1.2022, lr_0 = 5.7556e-04
Loss = 4.7079e-01, PNorm = 54.1250, GNorm = 1.3839, lr_0 = 5.7516e-04
Loss = 3.9497e-01, PNorm = 54.1418, GNorm = 1.2245, lr_0 = 5.7477e-04
Loss = 4.0670e-01, PNorm = 54.1556, GNorm = 1.1593, lr_0 = 5.7438e-04
Loss = 4.4192e-01, PNorm = 54.1677, GNorm = 1.1699, lr_0 = 5.7398e-04
Loss = 4.4633e-01, PNorm = 54.1763, GNorm = 1.4661, lr_0 = 5.7359e-04
Loss = 4.3083e-01, PNorm = 54.1846, GNorm = 1.7710, lr_0 = 5.7320e-04
Loss = 4.7380e-01, PNorm = 54.1943, GNorm = 1.7716, lr_0 = 5.7280e-04
Loss = 4.3499e-01, PNorm = 54.2038, GNorm = 1.1027, lr_0 = 5.7241e-04
Loss = 5.3803e-01, PNorm = 54.2158, GNorm = 1.7939, lr_0 = 5.7202e-04
Loss = 3.9845e-01, PNorm = 54.2242, GNorm = 1.8119, lr_0 = 5.7163e-04
Loss = 5.0512e-01, PNorm = 54.2333, GNorm = 1.4535, lr_0 = 5.7124e-04
Loss = 4.3686e-01, PNorm = 54.2486, GNorm = 1.5826, lr_0 = 5.7084e-04
Loss = 3.8070e-01, PNorm = 54.2639, GNorm = 1.5244, lr_0 = 5.7045e-04
Loss = 4.5311e-01, PNorm = 54.2822, GNorm = 1.4376, lr_0 = 5.7006e-04
Loss = 4.2247e-01, PNorm = 54.2891, GNorm = 1.4961, lr_0 = 5.6967e-04
Loss = 4.2225e-01, PNorm = 54.2957, GNorm = 1.2579, lr_0 = 5.6928e-04
Loss = 4.2252e-01, PNorm = 54.2983, GNorm = 1.0335, lr_0 = 5.6889e-04
Loss = 4.1424e-01, PNorm = 54.3085, GNorm = 1.4929, lr_0 = 5.6850e-04
Loss = 4.0190e-01, PNorm = 54.3177, GNorm = 1.2795, lr_0 = 5.6811e-04
Loss = 4.2372e-01, PNorm = 54.3253, GNorm = 1.1945, lr_0 = 5.6772e-04
Loss = 4.2863e-01, PNorm = 54.3475, GNorm = 1.2825, lr_0 = 5.6733e-04
Loss = 4.4362e-01, PNorm = 54.3601, GNorm = 2.6630, lr_0 = 5.6695e-04
Loss = 4.8251e-01, PNorm = 54.3708, GNorm = 2.0086, lr_0 = 5.6656e-04
Loss = 4.5961e-01, PNorm = 54.3913, GNorm = 1.1668, lr_0 = 5.6617e-04
Loss = 3.8813e-01, PNorm = 54.4023, GNorm = 1.6117, lr_0 = 5.6578e-04
Loss = 4.3589e-01, PNorm = 54.4092, GNorm = 1.0303, lr_0 = 5.6539e-04
Loss = 4.4104e-01, PNorm = 54.4209, GNorm = 1.4850, lr_0 = 5.6501e-04
Loss = 4.1064e-01, PNorm = 54.4322, GNorm = 1.1909, lr_0 = 5.6462e-04
Loss = 3.7745e-01, PNorm = 54.4435, GNorm = 1.5452, lr_0 = 5.6423e-04
Loss = 4.0838e-01, PNorm = 54.4517, GNorm = 1.0355, lr_0 = 5.6385e-04
Loss = 4.2670e-01, PNorm = 54.4574, GNorm = 1.0966, lr_0 = 5.6346e-04
Loss = 4.3679e-01, PNorm = 54.4676, GNorm = 1.1828, lr_0 = 5.6307e-04
Loss = 5.3093e-01, PNorm = 54.4839, GNorm = 1.6271, lr_0 = 5.6269e-04
Loss = 4.6945e-01, PNorm = 54.4945, GNorm = 1.1927, lr_0 = 5.6230e-04
Validation mae = 0.116059
Epoch 9
Loss = 4.5592e-01, PNorm = 54.5011, GNorm = 1.2526, lr_0 = 5.6192e-04
Loss = 4.1099e-01, PNorm = 54.5118, GNorm = 1.2712, lr_0 = 5.6153e-04
Loss = 3.7594e-01, PNorm = 54.5216, GNorm = 1.1710, lr_0 = 5.6115e-04
Loss = 4.1220e-01, PNorm = 54.5257, GNorm = 1.6145, lr_0 = 5.6076e-04
Loss = 4.1873e-01, PNorm = 54.5331, GNorm = 1.8956, lr_0 = 5.6038e-04
Loss = 4.5512e-01, PNorm = 54.5439, GNorm = 1.2742, lr_0 = 5.6000e-04
Loss = 4.2196e-01, PNorm = 54.5550, GNorm = 2.4593, lr_0 = 5.5961e-04
Loss = 4.1869e-01, PNorm = 54.5600, GNorm = 1.0057, lr_0 = 5.5923e-04
Loss = 4.2871e-01, PNorm = 54.5686, GNorm = 2.7407, lr_0 = 5.5885e-04
Loss = 3.7501e-01, PNorm = 54.5762, GNorm = 1.2119, lr_0 = 5.5846e-04
Loss = 4.7206e-01, PNorm = 54.5825, GNorm = 1.6018, lr_0 = 5.5808e-04
Loss = 4.0960e-01, PNorm = 54.5951, GNorm = 1.6948, lr_0 = 5.5770e-04
Loss = 4.3209e-01, PNorm = 54.6071, GNorm = 1.2362, lr_0 = 5.5732e-04
Loss = 4.4717e-01, PNorm = 54.6229, GNorm = 1.5130, lr_0 = 5.5693e-04
Loss = 4.0010e-01, PNorm = 54.6374, GNorm = 1.4040, lr_0 = 5.5655e-04
Loss = 4.2989e-01, PNorm = 54.6485, GNorm = 1.8057, lr_0 = 5.5617e-04
Loss = 3.8548e-01, PNorm = 54.6593, GNorm = 1.1259, lr_0 = 5.5579e-04
Loss = 3.6678e-01, PNorm = 54.6700, GNorm = 1.3904, lr_0 = 5.5541e-04
Loss = 4.4974e-01, PNorm = 54.6810, GNorm = 2.0326, lr_0 = 5.5503e-04
Loss = 5.1937e-01, PNorm = 54.6874, GNorm = 2.4642, lr_0 = 5.5465e-04
Loss = 4.6333e-01, PNorm = 54.7004, GNorm = 2.1904, lr_0 = 5.5427e-04
Loss = 3.8590e-01, PNorm = 54.7104, GNorm = 1.4353, lr_0 = 5.5389e-04
Loss = 3.6532e-01, PNorm = 54.7248, GNorm = 0.9449, lr_0 = 5.5351e-04
Loss = 4.3157e-01, PNorm = 54.7381, GNorm = 1.0160, lr_0 = 5.5313e-04
Loss = 4.2123e-01, PNorm = 54.7453, GNorm = 1.3605, lr_0 = 5.5275e-04
Loss = 4.2135e-01, PNorm = 54.7575, GNorm = 1.5585, lr_0 = 5.5237e-04
Loss = 4.3270e-01, PNorm = 54.7693, GNorm = 1.4651, lr_0 = 5.5199e-04
Loss = 4.5078e-01, PNorm = 54.7797, GNorm = 1.5897, lr_0 = 5.5162e-04
Loss = 4.3512e-01, PNorm = 54.7826, GNorm = 1.4139, lr_0 = 5.5124e-04
Loss = 3.7416e-01, PNorm = 54.7923, GNorm = 1.2695, lr_0 = 5.5086e-04
Loss = 4.0251e-01, PNorm = 54.8010, GNorm = 1.3738, lr_0 = 5.5048e-04
Loss = 4.3606e-01, PNorm = 54.8065, GNorm = 1.2535, lr_0 = 5.5011e-04
Loss = 3.9879e-01, PNorm = 54.8149, GNorm = 1.2650, lr_0 = 5.4973e-04
Loss = 4.7699e-01, PNorm = 54.8248, GNorm = 1.4376, lr_0 = 5.4935e-04
Loss = 4.1827e-01, PNorm = 54.8316, GNorm = 1.7236, lr_0 = 5.4898e-04
Loss = 3.8741e-01, PNorm = 54.8385, GNorm = 1.2423, lr_0 = 5.4860e-04
Loss = 3.9812e-01, PNorm = 54.8495, GNorm = 2.2471, lr_0 = 5.4822e-04
Loss = 4.0208e-01, PNorm = 54.8562, GNorm = 1.2435, lr_0 = 5.4785e-04
Loss = 4.3515e-01, PNorm = 54.8634, GNorm = 1.6489, lr_0 = 5.4747e-04
Loss = 4.2393e-01, PNorm = 54.8700, GNorm = 1.9662, lr_0 = 5.4710e-04
Loss = 4.2131e-01, PNorm = 54.8788, GNorm = 1.7061, lr_0 = 5.4672e-04
Loss = 3.6781e-01, PNorm = 54.8878, GNorm = 1.8599, lr_0 = 5.4635e-04
Loss = 4.5534e-01, PNorm = 54.8998, GNorm = 1.5586, lr_0 = 5.4597e-04
Loss = 4.1032e-01, PNorm = 54.9064, GNorm = 1.4334, lr_0 = 5.4560e-04
Loss = 5.1044e-01, PNorm = 54.9195, GNorm = 1.3895, lr_0 = 5.4523e-04
Loss = 4.2050e-01, PNorm = 54.9428, GNorm = 1.8599, lr_0 = 5.4485e-04
Loss = 4.3500e-01, PNorm = 54.9520, GNorm = 1.4543, lr_0 = 5.4448e-04
Loss = 4.4125e-01, PNorm = 54.9752, GNorm = 0.8346, lr_0 = 5.4411e-04
Loss = 4.2813e-01, PNorm = 54.9884, GNorm = 1.1473, lr_0 = 5.4373e-04
Loss = 4.4852e-01, PNorm = 54.9936, GNorm = 1.1347, lr_0 = 5.4336e-04
Loss = 4.6433e-01, PNorm = 54.9990, GNorm = 1.4993, lr_0 = 5.4299e-04
Loss = 4.4518e-01, PNorm = 55.0096, GNorm = 1.5379, lr_0 = 5.4262e-04
Loss = 3.9724e-01, PNorm = 55.0226, GNorm = 2.4251, lr_0 = 5.4225e-04
Loss = 4.6339e-01, PNorm = 55.0270, GNorm = 1.7064, lr_0 = 5.4187e-04
Loss = 4.0455e-01, PNorm = 55.0317, GNorm = 1.6116, lr_0 = 5.4150e-04
Loss = 4.6453e-01, PNorm = 55.0423, GNorm = 1.8085, lr_0 = 5.4113e-04
Loss = 4.4651e-01, PNorm = 55.0518, GNorm = 1.2771, lr_0 = 5.4076e-04
Loss = 4.9411e-01, PNorm = 55.0676, GNorm = 0.9598, lr_0 = 5.4039e-04
Loss = 4.2499e-01, PNorm = 55.0803, GNorm = 1.5931, lr_0 = 5.4002e-04
Loss = 3.9595e-01, PNorm = 55.0928, GNorm = 1.0978, lr_0 = 5.3965e-04
Loss = 4.0027e-01, PNorm = 55.1055, GNorm = 0.9360, lr_0 = 5.3928e-04
Loss = 4.9616e-01, PNorm = 55.1047, GNorm = 1.7361, lr_0 = 5.3891e-04
Loss = 4.1264e-01, PNorm = 55.1201, GNorm = 1.0912, lr_0 = 5.3854e-04
Loss = 4.3809e-01, PNorm = 55.1289, GNorm = 1.1387, lr_0 = 5.3817e-04
Loss = 3.7178e-01, PNorm = 55.1377, GNorm = 1.4387, lr_0 = 5.3781e-04
Loss = 4.4473e-01, PNorm = 55.1452, GNorm = 1.3134, lr_0 = 5.3744e-04
Loss = 4.3977e-01, PNorm = 55.1549, GNorm = 1.3876, lr_0 = 5.3707e-04
Loss = 3.5500e-01, PNorm = 55.1696, GNorm = 1.2204, lr_0 = 5.3670e-04
Loss = 4.7298e-01, PNorm = 55.1775, GNorm = 1.7420, lr_0 = 5.3633e-04
Loss = 4.1803e-01, PNorm = 55.1858, GNorm = 1.1279, lr_0 = 5.3597e-04
Loss = 3.8952e-01, PNorm = 55.1978, GNorm = 1.0736, lr_0 = 5.3560e-04
Loss = 4.4729e-01, PNorm = 55.2119, GNorm = 1.5687, lr_0 = 5.3523e-04
Loss = 4.5695e-01, PNorm = 55.2265, GNorm = 1.3069, lr_0 = 5.3486e-04
Loss = 4.2298e-01, PNorm = 55.2399, GNorm = 1.0643, lr_0 = 5.3450e-04
Loss = 4.0547e-01, PNorm = 55.2477, GNorm = 1.1350, lr_0 = 5.3413e-04
Loss = 4.1324e-01, PNorm = 55.2594, GNorm = 1.0113, lr_0 = 5.3377e-04
Loss = 3.7868e-01, PNorm = 55.2689, GNorm = 1.2797, lr_0 = 5.3340e-04
Loss = 3.9900e-01, PNorm = 55.2798, GNorm = 1.7193, lr_0 = 5.3304e-04
Loss = 4.5076e-01, PNorm = 55.2931, GNorm = 2.4710, lr_0 = 5.3267e-04
Loss = 4.6038e-01, PNorm = 55.3080, GNorm = 1.2600, lr_0 = 5.3231e-04
Loss = 4.7956e-01, PNorm = 55.3116, GNorm = 1.5173, lr_0 = 5.3194e-04
Loss = 4.0484e-01, PNorm = 55.3192, GNorm = 1.9072, lr_0 = 5.3158e-04
Loss = 4.2507e-01, PNorm = 55.3260, GNorm = 1.2486, lr_0 = 5.3121e-04
Loss = 4.3083e-01, PNorm = 55.3390, GNorm = 1.9446, lr_0 = 5.3085e-04
Loss = 4.4778e-01, PNorm = 55.3440, GNorm = 2.1910, lr_0 = 5.3048e-04
Loss = 4.4112e-01, PNorm = 55.3533, GNorm = 1.3704, lr_0 = 5.3012e-04
Loss = 4.1293e-01, PNorm = 55.3628, GNorm = 0.9316, lr_0 = 5.2976e-04
Loss = 4.6947e-01, PNorm = 55.3725, GNorm = 2.1780, lr_0 = 5.2939e-04
Loss = 4.6582e-01, PNorm = 55.3851, GNorm = 1.0297, lr_0 = 5.2903e-04
Loss = 4.2648e-01, PNorm = 55.3920, GNorm = 1.6068, lr_0 = 5.2867e-04
Loss = 4.0383e-01, PNorm = 55.4032, GNorm = 0.9927, lr_0 = 5.2831e-04
Loss = 3.8871e-01, PNorm = 55.4071, GNorm = 1.6266, lr_0 = 5.2795e-04
Loss = 4.7884e-01, PNorm = 55.4124, GNorm = 1.6040, lr_0 = 5.2758e-04
Loss = 4.3081e-01, PNorm = 55.4206, GNorm = 1.6135, lr_0 = 5.2722e-04
Loss = 4.6518e-01, PNorm = 55.4313, GNorm = 2.1455, lr_0 = 5.2686e-04
Loss = 4.4019e-01, PNorm = 55.4437, GNorm = 1.3020, lr_0 = 5.2650e-04
Loss = 4.2562e-01, PNorm = 55.4553, GNorm = 1.1957, lr_0 = 5.2614e-04
Loss = 3.8103e-01, PNorm = 55.4644, GNorm = 1.1511, lr_0 = 5.2578e-04
Loss = 4.2479e-01, PNorm = 55.4667, GNorm = 2.1930, lr_0 = 5.2542e-04
Loss = 3.7310e-01, PNorm = 55.4719, GNorm = 2.6992, lr_0 = 5.2506e-04
Loss = 4.0332e-01, PNorm = 55.4836, GNorm = 1.9603, lr_0 = 5.2470e-04
Loss = 4.4659e-01, PNorm = 55.4956, GNorm = 1.4695, lr_0 = 5.2434e-04
Loss = 4.3345e-01, PNorm = 55.5026, GNorm = 1.5749, lr_0 = 5.2398e-04
Loss = 4.1992e-01, PNorm = 55.5148, GNorm = 1.2144, lr_0 = 5.2362e-04
Loss = 3.8109e-01, PNorm = 55.5235, GNorm = 1.3839, lr_0 = 5.2326e-04
Loss = 4.1462e-01, PNorm = 55.5313, GNorm = 1.3996, lr_0 = 5.2290e-04
Loss = 4.0395e-01, PNorm = 55.5399, GNorm = 1.0456, lr_0 = 5.2255e-04
Loss = 3.6450e-01, PNorm = 55.5532, GNorm = 1.2609, lr_0 = 5.2219e-04
Loss = 4.3984e-01, PNorm = 55.5661, GNorm = 2.6479, lr_0 = 5.2183e-04
Loss = 4.3244e-01, PNorm = 55.5704, GNorm = 1.3915, lr_0 = 5.2147e-04
Loss = 4.1928e-01, PNorm = 55.5809, GNorm = 1.6699, lr_0 = 5.2112e-04
Loss = 3.8012e-01, PNorm = 55.5952, GNorm = 1.1042, lr_0 = 5.2076e-04
Loss = 4.2918e-01, PNorm = 55.6014, GNorm = 1.3675, lr_0 = 5.2040e-04
Loss = 4.0591e-01, PNorm = 55.6116, GNorm = 1.3011, lr_0 = 5.2005e-04
Loss = 4.3147e-01, PNorm = 55.6216, GNorm = 1.0065, lr_0 = 5.1969e-04
Loss = 4.5241e-01, PNorm = 55.6265, GNorm = 1.1343, lr_0 = 5.1933e-04
Loss = 4.3151e-01, PNorm = 55.6384, GNorm = 1.7734, lr_0 = 5.1898e-04
Loss = 4.5370e-01, PNorm = 55.6549, GNorm = 2.0937, lr_0 = 5.1862e-04
Loss = 4.3504e-01, PNorm = 55.6695, GNorm = 1.9635, lr_0 = 5.1827e-04
Loss = 4.1446e-01, PNorm = 55.6809, GNorm = 0.8705, lr_0 = 5.1791e-04
Validation mae = 0.115629
Epoch 10
Loss = 4.3233e-01, PNorm = 55.6914, GNorm = 1.7009, lr_0 = 5.1756e-04
Loss = 4.9180e-01, PNorm = 55.7023, GNorm = 1.7314, lr_0 = 5.1720e-04
Loss = 4.1108e-01, PNorm = 55.7120, GNorm = 1.4990, lr_0 = 5.1685e-04
Loss = 4.0046e-01, PNorm = 55.7204, GNorm = 1.1870, lr_0 = 5.1649e-04
Loss = 3.6294e-01, PNorm = 55.7278, GNorm = 1.2941, lr_0 = 5.1614e-04
Loss = 4.3647e-01, PNorm = 55.7350, GNorm = 2.3879, lr_0 = 5.1579e-04
Loss = 4.7076e-01, PNorm = 55.7490, GNorm = 1.5998, lr_0 = 5.1543e-04
Loss = 4.3609e-01, PNorm = 55.7605, GNorm = 1.3824, lr_0 = 5.1508e-04
Loss = 4.0371e-01, PNorm = 55.7755, GNorm = 1.1841, lr_0 = 5.1473e-04
Loss = 4.3144e-01, PNorm = 55.7840, GNorm = 1.2963, lr_0 = 5.1437e-04
Loss = 4.4143e-01, PNorm = 55.8003, GNorm = 1.5583, lr_0 = 5.1402e-04
Loss = 3.6713e-01, PNorm = 55.8130, GNorm = 1.5227, lr_0 = 5.1367e-04
Loss = 3.7394e-01, PNorm = 55.8193, GNorm = 1.3128, lr_0 = 5.1332e-04
Loss = 4.0889e-01, PNorm = 55.8238, GNorm = 1.5521, lr_0 = 5.1297e-04
Loss = 3.8737e-01, PNorm = 55.8256, GNorm = 1.1375, lr_0 = 5.1262e-04
Loss = 4.0946e-01, PNorm = 55.8356, GNorm = 0.8249, lr_0 = 5.1226e-04
Loss = 3.8729e-01, PNorm = 55.8471, GNorm = 1.2379, lr_0 = 5.1191e-04
Loss = 4.3608e-01, PNorm = 55.8572, GNorm = 2.2739, lr_0 = 5.1156e-04
Loss = 4.3970e-01, PNorm = 55.8615, GNorm = 1.4392, lr_0 = 5.1121e-04
Loss = 3.9409e-01, PNorm = 55.8695, GNorm = 1.1119, lr_0 = 5.1086e-04
Loss = 4.4159e-01, PNorm = 55.8779, GNorm = 1.6299, lr_0 = 5.1051e-04
Loss = 3.8350e-01, PNorm = 55.8853, GNorm = 1.5338, lr_0 = 5.1016e-04
Loss = 3.9402e-01, PNorm = 55.8987, GNorm = 1.0320, lr_0 = 5.0981e-04
Loss = 4.0577e-01, PNorm = 55.9053, GNorm = 0.9781, lr_0 = 5.0946e-04
Loss = 4.0610e-01, PNorm = 55.9145, GNorm = 1.6797, lr_0 = 5.0911e-04
Loss = 4.1296e-01, PNorm = 55.9230, GNorm = 1.1943, lr_0 = 5.0877e-04
Loss = 3.6507e-01, PNorm = 55.9381, GNorm = 1.4477, lr_0 = 5.0842e-04
Loss = 4.0847e-01, PNorm = 55.9439, GNorm = 1.2444, lr_0 = 5.0807e-04
Loss = 3.6046e-01, PNorm = 55.9483, GNorm = 1.4359, lr_0 = 5.0772e-04
Loss = 3.5160e-01, PNorm = 55.9550, GNorm = 0.8749, lr_0 = 5.0737e-04
Loss = 4.5123e-01, PNorm = 55.9649, GNorm = 1.3611, lr_0 = 5.0703e-04
Loss = 4.2588e-01, PNorm = 55.9760, GNorm = 1.7697, lr_0 = 5.0668e-04
Loss = 3.6618e-01, PNorm = 55.9841, GNorm = 1.1277, lr_0 = 5.0633e-04
Loss = 4.0774e-01, PNorm = 55.9955, GNorm = 1.4994, lr_0 = 5.0598e-04
Loss = 4.3162e-01, PNorm = 56.0074, GNorm = 1.1272, lr_0 = 5.0564e-04
Loss = 4.1818e-01, PNorm = 56.0163, GNorm = 1.7444, lr_0 = 5.0529e-04
Loss = 3.9725e-01, PNorm = 56.0197, GNorm = 1.0465, lr_0 = 5.0494e-04
Loss = 3.5362e-01, PNorm = 56.0255, GNorm = 1.1761, lr_0 = 5.0460e-04
Loss = 4.0080e-01, PNorm = 56.0367, GNorm = 2.1268, lr_0 = 5.0425e-04
Loss = 3.7975e-01, PNorm = 56.0469, GNorm = 1.3479, lr_0 = 5.0391e-04
Loss = 4.1688e-01, PNorm = 56.0548, GNorm = 1.8326, lr_0 = 5.0356e-04
Loss = 4.4301e-01, PNorm = 56.0647, GNorm = 1.3580, lr_0 = 5.0322e-04
Loss = 3.4869e-01, PNorm = 56.0717, GNorm = 0.9123, lr_0 = 5.0287e-04
Loss = 4.3656e-01, PNorm = 56.0821, GNorm = 1.3184, lr_0 = 5.0253e-04
Loss = 4.2172e-01, PNorm = 56.0912, GNorm = 1.7255, lr_0 = 5.0218e-04
Loss = 4.0646e-01, PNorm = 56.1030, GNorm = 1.3844, lr_0 = 5.0184e-04
Loss = 4.1552e-01, PNorm = 56.1078, GNorm = 1.6622, lr_0 = 5.0150e-04
Loss = 4.2902e-01, PNorm = 56.1129, GNorm = 2.5948, lr_0 = 5.0115e-04
Loss = 3.5569e-01, PNorm = 56.1231, GNorm = 1.0596, lr_0 = 5.0081e-04
Loss = 4.3323e-01, PNorm = 56.1297, GNorm = 1.6510, lr_0 = 5.0047e-04
Loss = 3.9955e-01, PNorm = 56.1391, GNorm = 2.4657, lr_0 = 5.0012e-04
Loss = 4.0506e-01, PNorm = 56.1401, GNorm = 1.3229, lr_0 = 4.9978e-04
Loss = 4.2689e-01, PNorm = 56.1535, GNorm = 1.0808, lr_0 = 4.9944e-04
Loss = 4.0710e-01, PNorm = 56.1669, GNorm = 1.3374, lr_0 = 4.9910e-04
Loss = 4.6357e-01, PNorm = 56.1812, GNorm = 1.5606, lr_0 = 4.9875e-04
Loss = 3.3488e-01, PNorm = 56.1879, GNorm = 1.5435, lr_0 = 4.9841e-04
Loss = 4.4760e-01, PNorm = 56.2006, GNorm = 1.2027, lr_0 = 4.9807e-04
Loss = 4.2737e-01, PNorm = 56.2164, GNorm = 0.9697, lr_0 = 4.9773e-04
Loss = 4.8697e-01, PNorm = 56.2283, GNorm = 1.1901, lr_0 = 4.9739e-04
Loss = 4.8610e-01, PNorm = 56.2392, GNorm = 1.9967, lr_0 = 4.9705e-04
Loss = 4.3404e-01, PNorm = 56.2521, GNorm = 1.5025, lr_0 = 4.9671e-04
Loss = 4.2949e-01, PNorm = 56.2631, GNorm = 1.6511, lr_0 = 4.9637e-04
Loss = 4.4175e-01, PNorm = 56.2709, GNorm = 1.6150, lr_0 = 4.9603e-04
Loss = 3.6743e-01, PNorm = 56.2835, GNorm = 1.2842, lr_0 = 4.9569e-04
Loss = 4.6160e-01, PNorm = 56.2914, GNorm = 1.5147, lr_0 = 4.9535e-04
Loss = 3.7802e-01, PNorm = 56.2983, GNorm = 0.8277, lr_0 = 4.9501e-04
Loss = 4.1483e-01, PNorm = 56.3025, GNorm = 1.1662, lr_0 = 4.9467e-04
Loss = 3.6485e-01, PNorm = 56.3128, GNorm = 1.5916, lr_0 = 4.9433e-04
Loss = 4.0758e-01, PNorm = 56.3194, GNorm = 2.7318, lr_0 = 4.9399e-04
Loss = 3.8486e-01, PNorm = 56.3265, GNorm = 1.4182, lr_0 = 4.9365e-04
Loss = 4.3880e-01, PNorm = 56.3290, GNorm = 1.5328, lr_0 = 4.9332e-04
Loss = 4.6886e-01, PNorm = 56.3382, GNorm = 1.2398, lr_0 = 4.9298e-04
Loss = 4.3859e-01, PNorm = 56.3542, GNorm = 1.4309, lr_0 = 4.9264e-04
Loss = 4.6494e-01, PNorm = 56.3647, GNorm = 1.8952, lr_0 = 4.9230e-04
Loss = 4.3059e-01, PNorm = 56.3748, GNorm = 1.1315, lr_0 = 4.9197e-04
Loss = 3.8500e-01, PNorm = 56.3931, GNorm = 1.1686, lr_0 = 4.9163e-04
Loss = 4.5395e-01, PNorm = 56.4014, GNorm = 1.1804, lr_0 = 4.9129e-04
Loss = 3.8145e-01, PNorm = 56.4101, GNorm = 1.6339, lr_0 = 4.9095e-04
Loss = 4.5227e-01, PNorm = 56.4156, GNorm = 1.1140, lr_0 = 4.9062e-04
Loss = 4.4380e-01, PNorm = 56.4217, GNorm = 1.4026, lr_0 = 4.9028e-04
Loss = 3.8677e-01, PNorm = 56.4310, GNorm = 1.3032, lr_0 = 4.8995e-04
Loss = 4.3265e-01, PNorm = 56.4345, GNorm = 1.5913, lr_0 = 4.8961e-04
Loss = 4.5573e-01, PNorm = 56.4396, GNorm = 1.8018, lr_0 = 4.8928e-04
Loss = 4.1549e-01, PNorm = 56.4472, GNorm = 1.8661, lr_0 = 4.8894e-04
Loss = 4.3661e-01, PNorm = 56.4541, GNorm = 1.0755, lr_0 = 4.8861e-04
Loss = 4.1910e-01, PNorm = 56.4614, GNorm = 1.3987, lr_0 = 4.8827e-04
Loss = 4.4469e-01, PNorm = 56.4691, GNorm = 1.8390, lr_0 = 4.8794e-04
Loss = 4.7448e-01, PNorm = 56.4793, GNorm = 2.3584, lr_0 = 4.8760e-04
Loss = 3.9349e-01, PNorm = 56.4793, GNorm = 1.4014, lr_0 = 4.8727e-04
Loss = 4.0008e-01, PNorm = 56.4885, GNorm = 1.2880, lr_0 = 4.8693e-04
Loss = 4.5251e-01, PNorm = 56.4947, GNorm = 0.9954, lr_0 = 4.8660e-04
Loss = 4.4841e-01, PNorm = 56.5063, GNorm = 1.6361, lr_0 = 4.8627e-04
Loss = 3.6532e-01, PNorm = 56.5135, GNorm = 1.1050, lr_0 = 4.8593e-04
Loss = 3.6763e-01, PNorm = 56.5194, GNorm = 1.2698, lr_0 = 4.8560e-04
Loss = 3.5089e-01, PNorm = 56.5277, GNorm = 1.2444, lr_0 = 4.8527e-04
Loss = 4.3305e-01, PNorm = 56.5352, GNorm = 1.3280, lr_0 = 4.8494e-04
Loss = 4.1952e-01, PNorm = 56.5458, GNorm = 1.3295, lr_0 = 4.8460e-04
Loss = 4.1309e-01, PNorm = 56.5550, GNorm = 1.0190, lr_0 = 4.8427e-04
Loss = 4.1969e-01, PNorm = 56.5665, GNorm = 1.3965, lr_0 = 4.8394e-04
Loss = 4.0644e-01, PNorm = 56.5737, GNorm = 1.1579, lr_0 = 4.8361e-04
Loss = 4.3865e-01, PNorm = 56.5825, GNorm = 1.5997, lr_0 = 4.8328e-04
Loss = 4.4431e-01, PNorm = 56.5901, GNorm = 2.3310, lr_0 = 4.8295e-04
Loss = 4.1408e-01, PNorm = 56.6057, GNorm = 1.2010, lr_0 = 4.8262e-04
Loss = 4.0356e-01, PNorm = 56.6155, GNorm = 0.9099, lr_0 = 4.8228e-04
Loss = 3.6393e-01, PNorm = 56.6287, GNorm = 1.0935, lr_0 = 4.8195e-04
Loss = 4.2350e-01, PNorm = 56.6340, GNorm = 1.1013, lr_0 = 4.8162e-04
Loss = 3.7371e-01, PNorm = 56.6391, GNorm = 1.8909, lr_0 = 4.8129e-04
Loss = 3.8502e-01, PNorm = 56.6441, GNorm = 1.7137, lr_0 = 4.8096e-04
Loss = 4.4246e-01, PNorm = 56.6438, GNorm = 1.2232, lr_0 = 4.8064e-04
Loss = 4.0315e-01, PNorm = 56.6498, GNorm = 2.0350, lr_0 = 4.8031e-04
Loss = 4.3365e-01, PNorm = 56.6608, GNorm = 1.6562, lr_0 = 4.7998e-04
Loss = 4.2046e-01, PNorm = 56.6690, GNorm = 1.4336, lr_0 = 4.7965e-04
Loss = 4.0540e-01, PNorm = 56.6809, GNorm = 2.0398, lr_0 = 4.7932e-04
Loss = 4.2349e-01, PNorm = 56.6926, GNorm = 1.7106, lr_0 = 4.7899e-04
Loss = 4.2100e-01, PNorm = 56.7026, GNorm = 0.9059, lr_0 = 4.7866e-04
Loss = 3.5152e-01, PNorm = 56.7122, GNorm = 2.0163, lr_0 = 4.7833e-04
Loss = 4.6906e-01, PNorm = 56.7204, GNorm = 1.2049, lr_0 = 4.7801e-04
Loss = 4.3050e-01, PNorm = 56.7249, GNorm = 1.8485, lr_0 = 4.7768e-04
Loss = 4.0026e-01, PNorm = 56.7320, GNorm = 1.4260, lr_0 = 4.7735e-04
Loss = 4.1937e-01, PNorm = 56.7425, GNorm = 1.3875, lr_0 = 4.7703e-04
Validation mae = 0.116785
Epoch 11
Loss = 4.4786e-01, PNorm = 56.7521, GNorm = 2.9095, lr_0 = 4.7670e-04
Loss = 4.2830e-01, PNorm = 56.7641, GNorm = 1.1334, lr_0 = 4.7637e-04
Loss = 4.1232e-01, PNorm = 56.7751, GNorm = 1.5036, lr_0 = 4.7605e-04
Loss = 4.2147e-01, PNorm = 56.7916, GNorm = 1.6251, lr_0 = 4.7572e-04
Loss = 3.7190e-01, PNorm = 56.7990, GNorm = 1.3280, lr_0 = 4.7539e-04
Loss = 4.2405e-01, PNorm = 56.8098, GNorm = 1.3343, lr_0 = 4.7507e-04
Loss = 3.8067e-01, PNorm = 56.8194, GNorm = 1.9421, lr_0 = 4.7474e-04
Loss = 3.9575e-01, PNorm = 56.8286, GNorm = 1.6716, lr_0 = 4.7442e-04
Loss = 4.3909e-01, PNorm = 56.8364, GNorm = 1.4342, lr_0 = 4.7409e-04
Loss = 4.2440e-01, PNorm = 56.8382, GNorm = 1.4046, lr_0 = 4.7377e-04
Loss = 3.8359e-01, PNorm = 56.8450, GNorm = 1.2343, lr_0 = 4.7344e-04
Loss = 4.4931e-01, PNorm = 56.8562, GNorm = 1.2994, lr_0 = 4.7312e-04
Loss = 4.4243e-01, PNorm = 56.8639, GNorm = 2.3982, lr_0 = 4.7279e-04
Loss = 4.2933e-01, PNorm = 56.8647, GNorm = 2.0470, lr_0 = 4.7247e-04
Loss = 4.3080e-01, PNorm = 56.8753, GNorm = 1.6885, lr_0 = 4.7215e-04
Loss = 3.8070e-01, PNorm = 56.8858, GNorm = 0.8947, lr_0 = 4.7182e-04
Loss = 3.8372e-01, PNorm = 56.8917, GNorm = 1.3854, lr_0 = 4.7150e-04
Loss = 4.5945e-01, PNorm = 56.8976, GNorm = 1.5815, lr_0 = 4.7118e-04
Loss = 4.0604e-01, PNorm = 56.9043, GNorm = 1.3441, lr_0 = 4.7085e-04
Loss = 4.0016e-01, PNorm = 56.9104, GNorm = 1.5200, lr_0 = 4.7053e-04
Loss = 4.4440e-01, PNorm = 56.9236, GNorm = 1.0467, lr_0 = 4.7021e-04
Loss = 4.5555e-01, PNorm = 56.9360, GNorm = 2.2122, lr_0 = 4.6989e-04
Loss = 3.6540e-01, PNorm = 56.9460, GNorm = 1.2789, lr_0 = 4.6957e-04
Loss = 4.2380e-01, PNorm = 56.9457, GNorm = 2.1098, lr_0 = 4.6924e-04
Loss = 4.0484e-01, PNorm = 56.9501, GNorm = 0.9174, lr_0 = 4.6892e-04
Loss = 4.0158e-01, PNorm = 56.9604, GNorm = 1.3032, lr_0 = 4.6860e-04
Loss = 3.8590e-01, PNorm = 56.9678, GNorm = 2.0149, lr_0 = 4.6828e-04
Loss = 4.2688e-01, PNorm = 56.9807, GNorm = 1.4554, lr_0 = 4.6796e-04
Loss = 3.9922e-01, PNorm = 56.9912, GNorm = 1.1638, lr_0 = 4.6764e-04
Loss = 4.9312e-01, PNorm = 56.9993, GNorm = 1.2592, lr_0 = 4.6732e-04
Loss = 3.2196e-01, PNorm = 57.0103, GNorm = 1.2895, lr_0 = 4.6700e-04
Loss = 4.0527e-01, PNorm = 57.0147, GNorm = 1.5984, lr_0 = 4.6668e-04
Loss = 4.1065e-01, PNorm = 57.0252, GNorm = 1.4539, lr_0 = 4.6636e-04
Loss = 4.0217e-01, PNorm = 57.0325, GNorm = 1.2926, lr_0 = 4.6604e-04
Loss = 4.3790e-01, PNorm = 57.0419, GNorm = 1.9965, lr_0 = 4.6572e-04
Loss = 4.4951e-01, PNorm = 57.0551, GNorm = 0.9301, lr_0 = 4.6540e-04
Loss = 4.0917e-01, PNorm = 57.0604, GNorm = 1.4792, lr_0 = 4.6508e-04
Loss = 4.5804e-01, PNorm = 57.0699, GNorm = 1.9006, lr_0 = 4.6476e-04
Loss = 3.8669e-01, PNorm = 57.0779, GNorm = 1.1319, lr_0 = 4.6445e-04
Loss = 4.1615e-01, PNorm = 57.0809, GNorm = 1.1050, lr_0 = 4.6413e-04
Loss = 4.7879e-01, PNorm = 57.0928, GNorm = 1.7117, lr_0 = 4.6381e-04
Loss = 3.8716e-01, PNorm = 57.1060, GNorm = 1.2554, lr_0 = 4.6349e-04
Loss = 4.5089e-01, PNorm = 57.1158, GNorm = 1.8009, lr_0 = 4.6317e-04
Loss = 4.9887e-01, PNorm = 57.1255, GNorm = 2.0266, lr_0 = 4.6286e-04
Loss = 4.0723e-01, PNorm = 57.1320, GNorm = 1.4332, lr_0 = 4.6254e-04
Loss = 3.4058e-01, PNorm = 57.1368, GNorm = 1.1442, lr_0 = 4.6222e-04
Loss = 3.9976e-01, PNorm = 57.1471, GNorm = 1.4312, lr_0 = 4.6191e-04
Loss = 4.4952e-01, PNorm = 57.1611, GNorm = 1.8720, lr_0 = 4.6159e-04
Loss = 4.2013e-01, PNorm = 57.1643, GNorm = 1.3507, lr_0 = 4.6127e-04
Loss = 4.5205e-01, PNorm = 57.1651, GNorm = 1.7628, lr_0 = 4.6096e-04
Loss = 4.4138e-01, PNorm = 57.1759, GNorm = 1.0029, lr_0 = 4.6064e-04
Loss = 3.9081e-01, PNorm = 57.1884, GNorm = 1.3051, lr_0 = 4.6033e-04
Loss = 3.8002e-01, PNorm = 57.1976, GNorm = 1.3428, lr_0 = 4.6001e-04
Loss = 4.4367e-01, PNorm = 57.2034, GNorm = 1.4275, lr_0 = 4.5970e-04
Loss = 4.3090e-01, PNorm = 57.2118, GNorm = 1.5301, lr_0 = 4.5938e-04
Loss = 4.4217e-01, PNorm = 57.2225, GNorm = 1.4866, lr_0 = 4.5907e-04
Loss = 3.9690e-01, PNorm = 57.2282, GNorm = 1.0244, lr_0 = 4.5875e-04
Loss = 4.1557e-01, PNorm = 57.2369, GNorm = 1.1297, lr_0 = 4.5844e-04
Loss = 4.2965e-01, PNorm = 57.2451, GNorm = 1.9840, lr_0 = 4.5812e-04
Loss = 3.9583e-01, PNorm = 57.2504, GNorm = 1.8671, lr_0 = 4.5781e-04
Loss = 4.0215e-01, PNorm = 57.2558, GNorm = 1.7214, lr_0 = 4.5750e-04
Loss = 4.5538e-01, PNorm = 57.2574, GNorm = 1.1391, lr_0 = 4.5718e-04
Loss = 3.4474e-01, PNorm = 57.2652, GNorm = 1.5843, lr_0 = 4.5687e-04
Loss = 4.5129e-01, PNorm = 57.2698, GNorm = 1.1651, lr_0 = 4.5656e-04
Loss = 4.5563e-01, PNorm = 57.2724, GNorm = 1.0850, lr_0 = 4.5624e-04
Loss = 3.5520e-01, PNorm = 57.2827, GNorm = 1.3260, lr_0 = 4.5593e-04
Loss = 3.9126e-01, PNorm = 57.2883, GNorm = 1.2330, lr_0 = 4.5562e-04
Loss = 3.9351e-01, PNorm = 57.3043, GNorm = 1.6480, lr_0 = 4.5531e-04
Loss = 4.3576e-01, PNorm = 57.3108, GNorm = 1.2129, lr_0 = 4.5499e-04
Loss = 4.2733e-01, PNorm = 57.3235, GNorm = 1.7132, lr_0 = 4.5468e-04
Loss = 3.9246e-01, PNorm = 57.3386, GNorm = 0.9465, lr_0 = 4.5437e-04
Loss = 4.2578e-01, PNorm = 57.3484, GNorm = 1.4136, lr_0 = 4.5406e-04
Loss = 3.9064e-01, PNorm = 57.3521, GNorm = 1.2683, lr_0 = 4.5375e-04
Loss = 3.8327e-01, PNorm = 57.3532, GNorm = 1.2797, lr_0 = 4.5344e-04
Loss = 4.1981e-01, PNorm = 57.3565, GNorm = 1.7023, lr_0 = 4.5313e-04
Loss = 4.1500e-01, PNorm = 57.3594, GNorm = 2.4716, lr_0 = 4.5282e-04
Loss = 4.0165e-01, PNorm = 57.3715, GNorm = 1.6713, lr_0 = 4.5251e-04
Loss = 4.0959e-01, PNorm = 57.3753, GNorm = 1.1760, lr_0 = 4.5220e-04
Loss = 4.4001e-01, PNorm = 57.3798, GNorm = 1.2468, lr_0 = 4.5189e-04
Loss = 3.8998e-01, PNorm = 57.3902, GNorm = 1.4611, lr_0 = 4.5158e-04
Loss = 4.0863e-01, PNorm = 57.4018, GNorm = 1.1582, lr_0 = 4.5127e-04
Loss = 4.6349e-01, PNorm = 57.4027, GNorm = 1.5114, lr_0 = 4.5096e-04
Loss = 3.6992e-01, PNorm = 57.4123, GNorm = 1.0357, lr_0 = 4.5065e-04
Loss = 4.1083e-01, PNorm = 57.4223, GNorm = 1.3698, lr_0 = 4.5034e-04
Loss = 4.0055e-01, PNorm = 57.4330, GNorm = 1.8979, lr_0 = 4.5003e-04
Loss = 4.0459e-01, PNorm = 57.4388, GNorm = 1.5329, lr_0 = 4.4972e-04
Loss = 3.7761e-01, PNorm = 57.4503, GNorm = 1.1271, lr_0 = 4.4942e-04
Loss = 3.8934e-01, PNorm = 57.4609, GNorm = 1.1534, lr_0 = 4.4911e-04
Loss = 4.4318e-01, PNorm = 57.4710, GNorm = 1.4686, lr_0 = 4.4880e-04
Loss = 4.3183e-01, PNorm = 57.4763, GNorm = 1.4565, lr_0 = 4.4849e-04
Loss = 3.9442e-01, PNorm = 57.4783, GNorm = 1.0317, lr_0 = 4.4819e-04
Loss = 4.0519e-01, PNorm = 57.4857, GNorm = 1.4961, lr_0 = 4.4788e-04
Loss = 3.5952e-01, PNorm = 57.4933, GNorm = 1.1448, lr_0 = 4.4757e-04
Loss = 4.3095e-01, PNorm = 57.4987, GNorm = 0.9360, lr_0 = 4.4727e-04
Loss = 4.1631e-01, PNorm = 57.5075, GNorm = 1.6573, lr_0 = 4.4696e-04
Loss = 4.0464e-01, PNorm = 57.5214, GNorm = 1.5543, lr_0 = 4.4665e-04
Loss = 4.0202e-01, PNorm = 57.5225, GNorm = 1.8216, lr_0 = 4.4635e-04
Loss = 3.6086e-01, PNorm = 57.5350, GNorm = 1.5744, lr_0 = 4.4604e-04
Loss = 3.7735e-01, PNorm = 57.5462, GNorm = 1.1360, lr_0 = 4.4574e-04
Loss = 3.7835e-01, PNorm = 57.5582, GNorm = 2.3684, lr_0 = 4.4543e-04
Loss = 4.4508e-01, PNorm = 57.5638, GNorm = 1.5495, lr_0 = 4.4513e-04
Loss = 3.7741e-01, PNorm = 57.5729, GNorm = 1.2975, lr_0 = 4.4482e-04
Loss = 3.5138e-01, PNorm = 57.5767, GNorm = 1.6367, lr_0 = 4.4452e-04
Loss = 3.9817e-01, PNorm = 57.5818, GNorm = 1.4706, lr_0 = 4.4421e-04
Loss = 3.9087e-01, PNorm = 57.5841, GNorm = 1.5617, lr_0 = 4.4391e-04
Loss = 3.4984e-01, PNorm = 57.5883, GNorm = 0.8053, lr_0 = 4.4360e-04
Loss = 4.6943e-01, PNorm = 57.5856, GNorm = 2.1388, lr_0 = 4.4330e-04
Loss = 3.7861e-01, PNorm = 57.5963, GNorm = 1.4263, lr_0 = 4.4299e-04
Loss = 3.9206e-01, PNorm = 57.6040, GNorm = 1.3601, lr_0 = 4.4269e-04
Loss = 3.8035e-01, PNorm = 57.6078, GNorm = 1.2498, lr_0 = 4.4239e-04
Loss = 3.8520e-01, PNorm = 57.6137, GNorm = 1.3170, lr_0 = 4.4209e-04
Loss = 4.0331e-01, PNorm = 57.6253, GNorm = 1.2368, lr_0 = 4.4178e-04
Loss = 3.8348e-01, PNorm = 57.6318, GNorm = 1.6607, lr_0 = 4.4148e-04
Loss = 4.3344e-01, PNorm = 57.6361, GNorm = 1.5867, lr_0 = 4.4118e-04
Loss = 4.5190e-01, PNorm = 57.6440, GNorm = 1.4425, lr_0 = 4.4088e-04
Loss = 3.4381e-01, PNorm = 57.6559, GNorm = 1.0432, lr_0 = 4.4057e-04
Loss = 4.2863e-01, PNorm = 57.6661, GNorm = 1.5001, lr_0 = 4.4027e-04
Loss = 4.2769e-01, PNorm = 57.6744, GNorm = 1.0379, lr_0 = 4.3997e-04
Loss = 3.8008e-01, PNorm = 57.6848, GNorm = 1.7624, lr_0 = 4.3967e-04
Loss = 3.8868e-01, PNorm = 57.6877, GNorm = 2.0452, lr_0 = 4.3937e-04
Validation mae = 0.116119
Epoch 12
Loss = 5.7094e-01, PNorm = 57.6934, GNorm = 1.1885, lr_0 = 4.3907e-04
Loss = 4.1973e-01, PNorm = 57.7002, GNorm = 1.5780, lr_0 = 4.3877e-04
Loss = 4.7849e-01, PNorm = 57.7101, GNorm = 1.8001, lr_0 = 4.3846e-04
Loss = 4.1119e-01, PNorm = 57.7160, GNorm = 1.1801, lr_0 = 4.3816e-04
Loss = 3.8967e-01, PNorm = 57.7221, GNorm = 1.6315, lr_0 = 4.3786e-04
Loss = 3.9968e-01, PNorm = 57.7310, GNorm = 1.4397, lr_0 = 4.3756e-04
Loss = 3.7454e-01, PNorm = 57.7382, GNorm = 1.5502, lr_0 = 4.3726e-04
Loss = 4.1014e-01, PNorm = 57.7440, GNorm = 1.3712, lr_0 = 4.3696e-04
Loss = 3.7013e-01, PNorm = 57.7484, GNorm = 1.1616, lr_0 = 4.3667e-04
Loss = 4.2392e-01, PNorm = 57.7571, GNorm = 1.3473, lr_0 = 4.3637e-04
Loss = 4.0941e-01, PNorm = 57.7682, GNorm = 1.4031, lr_0 = 4.3607e-04
Loss = 3.6159e-01, PNorm = 57.7812, GNorm = 1.1941, lr_0 = 4.3577e-04
Loss = 3.4503e-01, PNorm = 57.7858, GNorm = 1.1035, lr_0 = 4.3547e-04
Loss = 3.7671e-01, PNorm = 57.7904, GNorm = 1.7090, lr_0 = 4.3517e-04
Loss = 3.9721e-01, PNorm = 57.7967, GNorm = 1.3010, lr_0 = 4.3487e-04
Loss = 3.7868e-01, PNorm = 57.8039, GNorm = 1.2964, lr_0 = 4.3458e-04
Loss = 4.0480e-01, PNorm = 57.8097, GNorm = 1.2349, lr_0 = 4.3428e-04
Loss = 3.9319e-01, PNorm = 57.8171, GNorm = 1.3679, lr_0 = 4.3398e-04
Loss = 3.5551e-01, PNorm = 57.8217, GNorm = 1.8367, lr_0 = 4.3368e-04
Loss = 3.6795e-01, PNorm = 57.8256, GNorm = 1.4943, lr_0 = 4.3339e-04
Loss = 4.0271e-01, PNorm = 57.8310, GNorm = 1.4746, lr_0 = 4.3309e-04
Loss = 4.2008e-01, PNorm = 57.8428, GNorm = 1.9627, lr_0 = 4.3279e-04
Loss = 4.1104e-01, PNorm = 57.8490, GNorm = 1.3683, lr_0 = 4.3250e-04
Loss = 4.0119e-01, PNorm = 57.8638, GNorm = 1.2432, lr_0 = 4.3220e-04
Loss = 4.2847e-01, PNorm = 57.8689, GNorm = 1.6823, lr_0 = 4.3190e-04
Loss = 3.5977e-01, PNorm = 57.8761, GNorm = 1.1670, lr_0 = 4.3161e-04
Loss = 4.1635e-01, PNorm = 57.8801, GNorm = 1.4499, lr_0 = 4.3131e-04
Loss = 4.1061e-01, PNorm = 57.8910, GNorm = 1.2131, lr_0 = 4.3102e-04
Loss = 3.8100e-01, PNorm = 57.8974, GNorm = 0.8407, lr_0 = 4.3072e-04
Loss = 4.2303e-01, PNorm = 57.9011, GNorm = 2.0182, lr_0 = 4.3043e-04
Loss = 3.9611e-01, PNorm = 57.9100, GNorm = 1.5962, lr_0 = 4.3013e-04
Loss = 3.9986e-01, PNorm = 57.9125, GNorm = 2.2277, lr_0 = 4.2984e-04
Loss = 4.0907e-01, PNorm = 57.9208, GNorm = 1.2895, lr_0 = 4.2954e-04
Loss = 4.4165e-01, PNorm = 57.9309, GNorm = 1.1492, lr_0 = 4.2925e-04
Loss = 3.7652e-01, PNorm = 57.9395, GNorm = 1.1936, lr_0 = 4.2895e-04
Loss = 3.7235e-01, PNorm = 57.9475, GNorm = 1.3984, lr_0 = 4.2866e-04
Loss = 4.0470e-01, PNorm = 57.9584, GNorm = 2.2485, lr_0 = 4.2837e-04
Loss = 3.8169e-01, PNorm = 57.9654, GNorm = 2.2797, lr_0 = 4.2807e-04
Loss = 4.3906e-01, PNorm = 57.9710, GNorm = 1.3059, lr_0 = 4.2778e-04
Loss = 4.2044e-01, PNorm = 57.9760, GNorm = 1.7620, lr_0 = 4.2749e-04
Loss = 3.8376e-01, PNorm = 57.9833, GNorm = 1.2030, lr_0 = 4.2719e-04
Loss = 3.8920e-01, PNorm = 57.9901, GNorm = 1.2090, lr_0 = 4.2690e-04
Loss = 3.9870e-01, PNorm = 58.0001, GNorm = 1.0403, lr_0 = 4.2661e-04
Loss = 4.4473e-01, PNorm = 58.0092, GNorm = 2.7201, lr_0 = 4.2632e-04
Loss = 4.2267e-01, PNorm = 58.0139, GNorm = 2.4128, lr_0 = 4.2602e-04
Loss = 4.2557e-01, PNorm = 58.0293, GNorm = 0.9746, lr_0 = 4.2573e-04
Loss = 3.5089e-01, PNorm = 58.0390, GNorm = 1.5657, lr_0 = 4.2544e-04
Loss = 4.9860e-01, PNorm = 58.0451, GNorm = 1.2366, lr_0 = 4.2515e-04
Loss = 3.8530e-01, PNorm = 58.0530, GNorm = 1.0152, lr_0 = 4.2486e-04
Loss = 3.9205e-01, PNorm = 58.0591, GNorm = 1.0760, lr_0 = 4.2457e-04
Loss = 4.0387e-01, PNorm = 58.0699, GNorm = 0.9174, lr_0 = 4.2428e-04
Loss = 4.4419e-01, PNorm = 58.0813, GNorm = 1.3862, lr_0 = 4.2399e-04
Loss = 4.4716e-01, PNorm = 58.0888, GNorm = 1.2025, lr_0 = 4.2370e-04
Loss = 4.2993e-01, PNorm = 58.0953, GNorm = 1.3424, lr_0 = 4.2340e-04
Loss = 3.7907e-01, PNorm = 58.0992, GNorm = 1.2835, lr_0 = 4.2311e-04
Loss = 3.0345e-01, PNorm = 58.1098, GNorm = 1.4923, lr_0 = 4.2283e-04
Loss = 4.4378e-01, PNorm = 58.1171, GNorm = 1.5645, lr_0 = 4.2254e-04
Loss = 4.3081e-01, PNorm = 58.1243, GNorm = 2.0362, lr_0 = 4.2225e-04
Loss = 3.7000e-01, PNorm = 58.1239, GNorm = 1.4417, lr_0 = 4.2196e-04
Loss = 3.5864e-01, PNorm = 58.1275, GNorm = 1.0753, lr_0 = 4.2167e-04
Loss = 4.4248e-01, PNorm = 58.1360, GNorm = 1.2936, lr_0 = 4.2138e-04
Loss = 4.0903e-01, PNorm = 58.1386, GNorm = 1.8826, lr_0 = 4.2109e-04
Loss = 4.2020e-01, PNorm = 58.1472, GNorm = 1.2001, lr_0 = 4.2080e-04
Loss = 3.6193e-01, PNorm = 58.1550, GNorm = 1.3692, lr_0 = 4.2051e-04
Loss = 3.6302e-01, PNorm = 58.1652, GNorm = 1.4474, lr_0 = 4.2023e-04
Loss = 3.6278e-01, PNorm = 58.1720, GNorm = 1.9001, lr_0 = 4.1994e-04
Loss = 3.7875e-01, PNorm = 58.1732, GNorm = 1.5233, lr_0 = 4.1965e-04
Loss = 4.1672e-01, PNorm = 58.1779, GNorm = 1.7250, lr_0 = 4.1936e-04
Loss = 4.0238e-01, PNorm = 58.1829, GNorm = 1.2369, lr_0 = 4.1907e-04
Loss = 3.6396e-01, PNorm = 58.1943, GNorm = 1.2135, lr_0 = 4.1879e-04
Loss = 3.4630e-01, PNorm = 58.1961, GNorm = 1.1872, lr_0 = 4.1850e-04
Loss = 4.0594e-01, PNorm = 58.2054, GNorm = 2.1560, lr_0 = 4.1821e-04
Loss = 4.6330e-01, PNorm = 58.2115, GNorm = 1.7841, lr_0 = 4.1793e-04
Loss = 4.0110e-01, PNorm = 58.2220, GNorm = 1.0935, lr_0 = 4.1764e-04
Loss = 3.9505e-01, PNorm = 58.2215, GNorm = 1.4577, lr_0 = 4.1736e-04
Loss = 4.1655e-01, PNorm = 58.2282, GNorm = 1.4073, lr_0 = 4.1707e-04
Loss = 4.3101e-01, PNorm = 58.2391, GNorm = 1.8686, lr_0 = 4.1678e-04
Loss = 3.7067e-01, PNorm = 58.2503, GNorm = 1.4871, lr_0 = 4.1650e-04
Loss = 3.9482e-01, PNorm = 58.2572, GNorm = 1.7003, lr_0 = 4.1621e-04
Loss = 4.2404e-01, PNorm = 58.2628, GNorm = 1.9585, lr_0 = 4.1593e-04
Loss = 4.6264e-01, PNorm = 58.2690, GNorm = 1.9309, lr_0 = 4.1564e-04
Loss = 3.8880e-01, PNorm = 58.2721, GNorm = 1.3058, lr_0 = 4.1536e-04
Loss = 3.8031e-01, PNorm = 58.2791, GNorm = 1.1043, lr_0 = 4.1507e-04
Loss = 3.8827e-01, PNorm = 58.2835, GNorm = 1.2816, lr_0 = 4.1479e-04
Loss = 3.3264e-01, PNorm = 58.2884, GNorm = 1.1783, lr_0 = 4.1450e-04
Loss = 3.7998e-01, PNorm = 58.2946, GNorm = 1.2393, lr_0 = 4.1422e-04
Loss = 4.1566e-01, PNorm = 58.3031, GNorm = 1.5570, lr_0 = 4.1394e-04
Loss = 4.1627e-01, PNorm = 58.3121, GNorm = 1.2189, lr_0 = 4.1365e-04
Loss = 4.6041e-01, PNorm = 58.3121, GNorm = 1.9409, lr_0 = 4.1337e-04
Loss = 4.1569e-01, PNorm = 58.3186, GNorm = 1.1671, lr_0 = 4.1309e-04
Loss = 4.3667e-01, PNorm = 58.3159, GNorm = 1.9751, lr_0 = 4.1280e-04
Loss = 4.6170e-01, PNorm = 58.3339, GNorm = 2.0921, lr_0 = 4.1252e-04
Loss = 4.3567e-01, PNorm = 58.3415, GNorm = 1.3227, lr_0 = 4.1224e-04
Loss = 3.6421e-01, PNorm = 58.3475, GNorm = 1.1170, lr_0 = 4.1196e-04
Loss = 3.5306e-01, PNorm = 58.3535, GNorm = 1.4546, lr_0 = 4.1167e-04
Loss = 4.5150e-01, PNorm = 58.3589, GNorm = 2.5110, lr_0 = 4.1139e-04
Loss = 4.2550e-01, PNorm = 58.3653, GNorm = 1.3836, lr_0 = 4.1111e-04
Loss = 4.3363e-01, PNorm = 58.3689, GNorm = 2.1967, lr_0 = 4.1083e-04
Loss = 3.8309e-01, PNorm = 58.3760, GNorm = 1.3806, lr_0 = 4.1055e-04
Loss = 3.8424e-01, PNorm = 58.3774, GNorm = 1.6898, lr_0 = 4.1027e-04
Loss = 3.9409e-01, PNorm = 58.3877, GNorm = 1.1679, lr_0 = 4.0998e-04
Loss = 4.1334e-01, PNorm = 58.3939, GNorm = 1.0829, lr_0 = 4.0970e-04
Loss = 4.1025e-01, PNorm = 58.4011, GNorm = 1.0666, lr_0 = 4.0942e-04
Loss = 4.4254e-01, PNorm = 58.4042, GNorm = 1.3040, lr_0 = 4.0914e-04
Loss = 3.9921e-01, PNorm = 58.4111, GNorm = 1.2513, lr_0 = 4.0886e-04
Loss = 3.9220e-01, PNorm = 58.4122, GNorm = 1.8041, lr_0 = 4.0858e-04
Loss = 3.8960e-01, PNorm = 58.4188, GNorm = 1.0452, lr_0 = 4.0830e-04
Loss = 4.5858e-01, PNorm = 58.4227, GNorm = 1.2179, lr_0 = 4.0802e-04
Loss = 4.6731e-01, PNorm = 58.4296, GNorm = 1.6602, lr_0 = 4.0774e-04
Loss = 3.7851e-01, PNorm = 58.4351, GNorm = 1.7137, lr_0 = 4.0746e-04
Loss = 3.9952e-01, PNorm = 58.4456, GNorm = 1.6190, lr_0 = 4.0718e-04
Loss = 4.2984e-01, PNorm = 58.4503, GNorm = 1.5071, lr_0 = 4.0691e-04
Loss = 3.6342e-01, PNorm = 58.4572, GNorm = 2.1640, lr_0 = 4.0663e-04
Loss = 3.5686e-01, PNorm = 58.4627, GNorm = 1.1469, lr_0 = 4.0635e-04
Loss = 4.0891e-01, PNorm = 58.4715, GNorm = 1.2809, lr_0 = 4.0607e-04
Loss = 3.9918e-01, PNorm = 58.4746, GNorm = 1.7725, lr_0 = 4.0579e-04
Loss = 4.0192e-01, PNorm = 58.4808, GNorm = 1.6011, lr_0 = 4.0551e-04
Loss = 3.9616e-01, PNorm = 58.4833, GNorm = 1.9713, lr_0 = 4.0524e-04
Loss = 4.3079e-01, PNorm = 58.4846, GNorm = 1.9197, lr_0 = 4.0496e-04
Loss = 4.4457e-01, PNorm = 58.4953, GNorm = 1.3765, lr_0 = 4.0468e-04
Validation mae = 0.113922
Epoch 13
Loss = 4.2067e-01, PNorm = 58.5044, GNorm = 1.7066, lr_0 = 4.0440e-04
Loss = 4.4517e-01, PNorm = 58.5148, GNorm = 1.3294, lr_0 = 4.0413e-04
Loss = 3.8044e-01, PNorm = 58.5233, GNorm = 1.6617, lr_0 = 4.0385e-04
Loss = 4.1286e-01, PNorm = 58.5338, GNorm = 1.5607, lr_0 = 4.0357e-04
Loss = 3.7106e-01, PNorm = 58.5433, GNorm = 1.4607, lr_0 = 4.0330e-04
Loss = 3.5708e-01, PNorm = 58.5494, GNorm = 1.4493, lr_0 = 4.0302e-04
Loss = 4.2439e-01, PNorm = 58.5615, GNorm = 1.4469, lr_0 = 4.0274e-04
Loss = 3.7353e-01, PNorm = 58.5645, GNorm = 1.2244, lr_0 = 4.0247e-04
Loss = 4.2633e-01, PNorm = 58.5687, GNorm = 1.4715, lr_0 = 4.0219e-04
Loss = 3.8591e-01, PNorm = 58.5672, GNorm = 1.2583, lr_0 = 4.0192e-04
Loss = 4.1202e-01, PNorm = 58.5740, GNorm = 1.4481, lr_0 = 4.0164e-04
Loss = 3.9162e-01, PNorm = 58.5775, GNorm = 1.5053, lr_0 = 4.0137e-04
Loss = 3.6840e-01, PNorm = 58.5864, GNorm = 1.5497, lr_0 = 4.0109e-04
Loss = 4.1862e-01, PNorm = 58.5967, GNorm = 1.2991, lr_0 = 4.0082e-04
Loss = 3.4828e-01, PNorm = 58.6011, GNorm = 1.5058, lr_0 = 4.0054e-04
Loss = 3.1757e-01, PNorm = 58.6078, GNorm = 1.3708, lr_0 = 4.0027e-04
Loss = 3.7065e-01, PNorm = 58.6125, GNorm = 1.1145, lr_0 = 3.9999e-04
Loss = 3.4954e-01, PNorm = 58.6177, GNorm = 1.3113, lr_0 = 3.9972e-04
Loss = 3.7630e-01, PNorm = 58.6261, GNorm = 1.7326, lr_0 = 3.9945e-04
Loss = 3.9894e-01, PNorm = 58.6308, GNorm = 1.0747, lr_0 = 3.9917e-04
Loss = 4.3409e-01, PNorm = 58.6358, GNorm = 1.1510, lr_0 = 3.9890e-04
Loss = 3.4478e-01, PNorm = 58.6472, GNorm = 1.4881, lr_0 = 3.9863e-04
Loss = 5.2789e-01, PNorm = 58.6575, GNorm = 1.1734, lr_0 = 3.9835e-04
Loss = 3.5739e-01, PNorm = 58.6651, GNorm = 1.2203, lr_0 = 3.9808e-04
Loss = 4.3989e-01, PNorm = 58.6731, GNorm = 1.3320, lr_0 = 3.9781e-04
Loss = 4.4759e-01, PNorm = 58.6770, GNorm = 1.0705, lr_0 = 3.9753e-04
Loss = 4.2438e-01, PNorm = 58.6850, GNorm = 1.4460, lr_0 = 3.9726e-04
Loss = 3.9495e-01, PNorm = 58.6906, GNorm = 1.1650, lr_0 = 3.9699e-04
Loss = 3.9878e-01, PNorm = 58.6934, GNorm = 1.2236, lr_0 = 3.9672e-04
Loss = 3.8468e-01, PNorm = 58.6987, GNorm = 1.4376, lr_0 = 3.9645e-04
Loss = 3.4488e-01, PNorm = 58.7019, GNorm = 1.3945, lr_0 = 3.9617e-04
Loss = 3.7792e-01, PNorm = 58.7029, GNorm = 1.4479, lr_0 = 3.9590e-04
Loss = 3.7801e-01, PNorm = 58.7104, GNorm = 2.0081, lr_0 = 3.9563e-04
Loss = 4.8093e-01, PNorm = 58.7156, GNorm = 1.4533, lr_0 = 3.9536e-04
Loss = 3.9752e-01, PNorm = 58.7271, GNorm = 1.2949, lr_0 = 3.9509e-04
Loss = 3.5655e-01, PNorm = 58.7351, GNorm = 1.3714, lr_0 = 3.9482e-04
Loss = 4.1362e-01, PNorm = 58.7443, GNorm = 2.0654, lr_0 = 3.9455e-04
Loss = 3.9054e-01, PNorm = 58.7476, GNorm = 1.2260, lr_0 = 3.9428e-04
Loss = 3.7676e-01, PNorm = 58.7538, GNorm = 1.6839, lr_0 = 3.9401e-04
Loss = 3.8349e-01, PNorm = 58.7565, GNorm = 1.7634, lr_0 = 3.9374e-04
Loss = 3.9335e-01, PNorm = 58.7654, GNorm = 1.4583, lr_0 = 3.9347e-04
Loss = 3.5443e-01, PNorm = 58.7760, GNorm = 1.4488, lr_0 = 3.9320e-04
Loss = 3.5017e-01, PNorm = 58.7847, GNorm = 1.2805, lr_0 = 3.9293e-04
Loss = 3.7020e-01, PNorm = 58.7915, GNorm = 1.3711, lr_0 = 3.9266e-04
Loss = 3.8364e-01, PNorm = 58.7979, GNorm = 1.5707, lr_0 = 3.9239e-04
Loss = 3.6387e-01, PNorm = 58.8026, GNorm = 1.3813, lr_0 = 3.9212e-04
Loss = 3.6842e-01, PNorm = 58.8051, GNorm = 1.3937, lr_0 = 3.9185e-04
Loss = 4.5392e-01, PNorm = 58.8070, GNorm = 0.9699, lr_0 = 3.9159e-04
Loss = 4.1975e-01, PNorm = 58.8183, GNorm = 1.6253, lr_0 = 3.9132e-04
Loss = 4.0913e-01, PNorm = 58.8275, GNorm = 1.4850, lr_0 = 3.9105e-04
Loss = 4.0849e-01, PNorm = 58.8317, GNorm = 1.4263, lr_0 = 3.9078e-04
Loss = 3.6814e-01, PNorm = 58.8339, GNorm = 1.4148, lr_0 = 3.9051e-04
Loss = 4.2540e-01, PNorm = 58.8390, GNorm = 2.4114, lr_0 = 3.9025e-04
Loss = 3.9463e-01, PNorm = 58.8417, GNorm = 1.6140, lr_0 = 3.8998e-04
Loss = 4.1717e-01, PNorm = 58.8488, GNorm = 2.4318, lr_0 = 3.8971e-04
Loss = 4.0893e-01, PNorm = 58.8545, GNorm = 1.4062, lr_0 = 3.8945e-04
Loss = 3.8191e-01, PNorm = 58.8685, GNorm = 1.9312, lr_0 = 3.8918e-04
Loss = 3.9724e-01, PNorm = 58.8734, GNorm = 1.4237, lr_0 = 3.8891e-04
Loss = 3.6117e-01, PNorm = 58.8826, GNorm = 1.2474, lr_0 = 3.8865e-04
Loss = 3.7506e-01, PNorm = 58.8872, GNorm = 1.3576, lr_0 = 3.8838e-04
Loss = 3.8818e-01, PNorm = 58.8938, GNorm = 1.4670, lr_0 = 3.8811e-04
Loss = 3.4114e-01, PNorm = 58.8974, GNorm = 1.0741, lr_0 = 3.8785e-04
Loss = 4.1072e-01, PNorm = 58.9048, GNorm = 1.8847, lr_0 = 3.8758e-04
Loss = 4.2017e-01, PNorm = 58.9065, GNorm = 2.1788, lr_0 = 3.8732e-04
Loss = 4.7189e-01, PNorm = 58.9136, GNorm = 1.4816, lr_0 = 3.8705e-04
Loss = 4.4615e-01, PNorm = 58.9212, GNorm = 1.4139, lr_0 = 3.8679e-04
Loss = 4.0079e-01, PNorm = 58.9237, GNorm = 1.7174, lr_0 = 3.8652e-04
Loss = 3.5659e-01, PNorm = 58.9287, GNorm = 1.4202, lr_0 = 3.8626e-04
Loss = 3.4329e-01, PNorm = 58.9337, GNorm = 1.5895, lr_0 = 3.8599e-04
Loss = 4.1158e-01, PNorm = 58.9412, GNorm = 1.4392, lr_0 = 3.8573e-04
Loss = 4.9275e-01, PNorm = 58.9449, GNorm = 1.7086, lr_0 = 3.8546e-04
Loss = 3.8309e-01, PNorm = 58.9523, GNorm = 2.1609, lr_0 = 3.8520e-04
Loss = 4.0988e-01, PNorm = 58.9557, GNorm = 1.2136, lr_0 = 3.8493e-04
Loss = 4.0986e-01, PNorm = 58.9552, GNorm = 1.6776, lr_0 = 3.8467e-04
Loss = 4.2732e-01, PNorm = 58.9611, GNorm = 1.8916, lr_0 = 3.8441e-04
Loss = 3.9234e-01, PNorm = 58.9666, GNorm = 1.6531, lr_0 = 3.8414e-04
Loss = 4.0621e-01, PNorm = 58.9713, GNorm = 1.1205, lr_0 = 3.8388e-04
Loss = 4.0787e-01, PNorm = 58.9763, GNorm = 2.0121, lr_0 = 3.8362e-04
Loss = 3.8285e-01, PNorm = 58.9801, GNorm = 1.2170, lr_0 = 3.8336e-04
Loss = 3.9892e-01, PNorm = 58.9893, GNorm = 1.7832, lr_0 = 3.8309e-04
Loss = 3.5673e-01, PNorm = 58.9951, GNorm = 1.5392, lr_0 = 3.8283e-04
Loss = 3.9123e-01, PNorm = 59.0012, GNorm = 1.4750, lr_0 = 3.8257e-04
Loss = 3.5448e-01, PNorm = 59.0093, GNorm = 1.2846, lr_0 = 3.8231e-04
Loss = 4.0962e-01, PNorm = 59.0133, GNorm = 1.3012, lr_0 = 3.8204e-04
Loss = 3.9044e-01, PNorm = 59.0186, GNorm = 1.3605, lr_0 = 3.8178e-04
Loss = 3.9008e-01, PNorm = 59.0249, GNorm = 1.3858, lr_0 = 3.8152e-04
Loss = 3.7658e-01, PNorm = 59.0305, GNorm = 1.0548, lr_0 = 3.8126e-04
Loss = 4.8379e-01, PNorm = 59.0350, GNorm = 1.4566, lr_0 = 3.8100e-04
Loss = 3.6179e-01, PNorm = 59.0439, GNorm = 1.0853, lr_0 = 3.8074e-04
Loss = 4.3980e-01, PNorm = 59.0508, GNorm = 1.4598, lr_0 = 3.8048e-04
Loss = 3.8612e-01, PNorm = 59.0599, GNorm = 1.4297, lr_0 = 3.8022e-04
Loss = 3.9137e-01, PNorm = 59.0617, GNorm = 1.1477, lr_0 = 3.7995e-04
Loss = 3.7150e-01, PNorm = 59.0612, GNorm = 0.9314, lr_0 = 3.7969e-04
Loss = 3.3561e-01, PNorm = 59.0671, GNorm = 1.6512, lr_0 = 3.7943e-04
Loss = 4.0302e-01, PNorm = 59.0741, GNorm = 1.4779, lr_0 = 3.7917e-04
Loss = 4.2420e-01, PNorm = 59.0819, GNorm = 1.7375, lr_0 = 3.7891e-04
Loss = 3.8883e-01, PNorm = 59.0906, GNorm = 1.0243, lr_0 = 3.7866e-04
Loss = 3.4134e-01, PNorm = 59.0938, GNorm = 1.0538, lr_0 = 3.7840e-04
Loss = 3.6909e-01, PNorm = 59.0979, GNorm = 1.4367, lr_0 = 3.7814e-04
Loss = 4.2082e-01, PNorm = 59.0987, GNorm = 1.5993, lr_0 = 3.7788e-04
Loss = 4.1290e-01, PNorm = 59.1017, GNorm = 1.3092, lr_0 = 3.7762e-04
Loss = 3.8792e-01, PNorm = 59.1116, GNorm = 2.2817, lr_0 = 3.7736e-04
Loss = 4.5532e-01, PNorm = 59.1187, GNorm = 1.2693, lr_0 = 3.7710e-04
Loss = 3.7438e-01, PNorm = 59.1301, GNorm = 1.7103, lr_0 = 3.7684e-04
Loss = 4.1168e-01, PNorm = 59.1280, GNorm = 1.5634, lr_0 = 3.7659e-04
Loss = 4.6405e-01, PNorm = 59.1360, GNorm = 1.5730, lr_0 = 3.7633e-04
Loss = 4.2865e-01, PNorm = 59.1360, GNorm = 2.0535, lr_0 = 3.7607e-04
Loss = 4.0307e-01, PNorm = 59.1434, GNorm = 1.4765, lr_0 = 3.7581e-04
Loss = 3.8112e-01, PNorm = 59.1441, GNorm = 1.2131, lr_0 = 3.7555e-04
Loss = 4.3494e-01, PNorm = 59.1556, GNorm = 1.2887, lr_0 = 3.7530e-04
Loss = 4.3783e-01, PNorm = 59.1648, GNorm = 1.8205, lr_0 = 3.7504e-04
Loss = 3.7541e-01, PNorm = 59.1741, GNorm = 1.2497, lr_0 = 3.7478e-04
Loss = 3.7145e-01, PNorm = 59.1807, GNorm = 1.4317, lr_0 = 3.7453e-04
Loss = 3.9743e-01, PNorm = 59.1913, GNorm = 1.2860, lr_0 = 3.7427e-04
Loss = 3.8568e-01, PNorm = 59.1990, GNorm = 1.6126, lr_0 = 3.7401e-04
Loss = 4.1481e-01, PNorm = 59.2031, GNorm = 1.3215, lr_0 = 3.7376e-04
Loss = 3.8463e-01, PNorm = 59.2135, GNorm = 1.8266, lr_0 = 3.7350e-04
Loss = 4.2518e-01, PNorm = 59.2161, GNorm = 1.3040, lr_0 = 3.7325e-04
Loss = 4.0701e-01, PNorm = 59.2230, GNorm = 1.6617, lr_0 = 3.7299e-04
Loss = 3.3393e-01, PNorm = 59.2211, GNorm = 1.2474, lr_0 = 3.7273e-04
Validation mae = 0.114106
Epoch 14
Loss = 3.3978e-01, PNorm = 59.2259, GNorm = 1.0558, lr_0 = 3.7248e-04
Loss = 4.3900e-01, PNorm = 59.2300, GNorm = 1.0096, lr_0 = 3.7222e-04
Loss = 3.4946e-01, PNorm = 59.2318, GNorm = 1.5030, lr_0 = 3.7197e-04
Loss = 3.6180e-01, PNorm = 59.2340, GNorm = 1.3395, lr_0 = 3.7171e-04
Loss = 3.8584e-01, PNorm = 59.2420, GNorm = 2.2084, lr_0 = 3.7146e-04
Loss = 3.6468e-01, PNorm = 59.2434, GNorm = 2.3241, lr_0 = 3.7120e-04
Loss = 3.9966e-01, PNorm = 59.2485, GNorm = 1.2385, lr_0 = 3.7095e-04
Loss = 3.9225e-01, PNorm = 59.2562, GNorm = 1.2256, lr_0 = 3.7070e-04
Loss = 4.2774e-01, PNorm = 59.2634, GNorm = 1.6686, lr_0 = 3.7044e-04
Loss = 3.7746e-01, PNorm = 59.2699, GNorm = 1.3650, lr_0 = 3.7019e-04
Loss = 3.1392e-01, PNorm = 59.2757, GNorm = 1.1807, lr_0 = 3.6993e-04
Loss = 3.5183e-01, PNorm = 59.2797, GNorm = 1.3542, lr_0 = 3.6968e-04
Loss = 3.9458e-01, PNorm = 59.2833, GNorm = 1.1306, lr_0 = 3.6943e-04
Loss = 4.0088e-01, PNorm = 59.2865, GNorm = 1.6690, lr_0 = 3.6917e-04
Loss = 3.5331e-01, PNorm = 59.2928, GNorm = 1.2972, lr_0 = 3.6892e-04
Loss = 3.4963e-01, PNorm = 59.3021, GNorm = 1.6569, lr_0 = 3.6867e-04
Loss = 3.6457e-01, PNorm = 59.3023, GNorm = 1.6385, lr_0 = 3.6842e-04
Loss = 3.3798e-01, PNorm = 59.3113, GNorm = 1.1955, lr_0 = 3.6816e-04
Loss = 3.8820e-01, PNorm = 59.3114, GNorm = 1.4475, lr_0 = 3.6791e-04
Loss = 3.7317e-01, PNorm = 59.3180, GNorm = 1.6516, lr_0 = 3.6766e-04
Loss = 3.5705e-01, PNorm = 59.3247, GNorm = 1.3323, lr_0 = 3.6741e-04
Loss = 4.4192e-01, PNorm = 59.3268, GNorm = 2.1301, lr_0 = 3.6716e-04
Loss = 3.7158e-01, PNorm = 59.3343, GNorm = 1.1661, lr_0 = 3.6690e-04
Loss = 4.0946e-01, PNorm = 59.3377, GNorm = 1.0622, lr_0 = 3.6665e-04
Loss = 4.6777e-01, PNorm = 59.3418, GNorm = 1.3917, lr_0 = 3.6640e-04
Loss = 3.7655e-01, PNorm = 59.3462, GNorm = 2.4484, lr_0 = 3.6615e-04
Loss = 3.8060e-01, PNorm = 59.3498, GNorm = 1.1945, lr_0 = 3.6590e-04
Loss = 3.7272e-01, PNorm = 59.3576, GNorm = 1.7018, lr_0 = 3.6565e-04
Loss = 4.0555e-01, PNorm = 59.3604, GNorm = 1.3128, lr_0 = 3.6540e-04
Loss = 4.2621e-01, PNorm = 59.3673, GNorm = 1.7094, lr_0 = 3.6515e-04
Loss = 4.4310e-01, PNorm = 59.3713, GNorm = 1.4199, lr_0 = 3.6490e-04
Loss = 3.9069e-01, PNorm = 59.3770, GNorm = 1.6610, lr_0 = 3.6465e-04
Loss = 3.1500e-01, PNorm = 59.3824, GNorm = 1.2255, lr_0 = 3.6440e-04
Loss = 4.1994e-01, PNorm = 59.3881, GNorm = 1.4470, lr_0 = 3.6415e-04
Loss = 4.3458e-01, PNorm = 59.3970, GNorm = 1.4554, lr_0 = 3.6390e-04
Loss = 4.3895e-01, PNorm = 59.4025, GNorm = 1.4755, lr_0 = 3.6365e-04
Loss = 4.1604e-01, PNorm = 59.4083, GNorm = 1.5350, lr_0 = 3.6340e-04
Loss = 3.7178e-01, PNorm = 59.4207, GNorm = 2.8151, lr_0 = 3.6315e-04
Loss = 3.8865e-01, PNorm = 59.4270, GNorm = 1.0516, lr_0 = 3.6290e-04
Loss = 3.9299e-01, PNorm = 59.4353, GNorm = 2.3357, lr_0 = 3.6266e-04
Loss = 3.8039e-01, PNorm = 59.4391, GNorm = 1.0875, lr_0 = 3.6241e-04
Loss = 3.7427e-01, PNorm = 59.4417, GNorm = 1.5665, lr_0 = 3.6216e-04
Loss = 4.0565e-01, PNorm = 59.4399, GNorm = 2.0362, lr_0 = 3.6191e-04
Loss = 4.3238e-01, PNorm = 59.4439, GNorm = 1.4844, lr_0 = 3.6166e-04
Loss = 3.9548e-01, PNorm = 59.4509, GNorm = 1.0541, lr_0 = 3.6141e-04
Loss = 4.1393e-01, PNorm = 59.4601, GNorm = 1.3363, lr_0 = 3.6117e-04
Loss = 3.9958e-01, PNorm = 59.4634, GNorm = 1.2170, lr_0 = 3.6092e-04
Loss = 3.6853e-01, PNorm = 59.4691, GNorm = 1.6866, lr_0 = 3.6067e-04
Loss = 3.8707e-01, PNorm = 59.4734, GNorm = 1.3390, lr_0 = 3.6043e-04
Loss = 3.8764e-01, PNorm = 59.4814, GNorm = 2.0870, lr_0 = 3.6018e-04
Loss = 3.9250e-01, PNorm = 59.4905, GNorm = 1.2323, lr_0 = 3.5993e-04
Loss = 4.0785e-01, PNorm = 59.4998, GNorm = 1.3478, lr_0 = 3.5969e-04
Loss = 3.9762e-01, PNorm = 59.5078, GNorm = 1.1224, lr_0 = 3.5944e-04
Loss = 4.0389e-01, PNorm = 59.5176, GNorm = 0.9424, lr_0 = 3.5919e-04
Loss = 4.0735e-01, PNorm = 59.5224, GNorm = 2.7008, lr_0 = 3.5895e-04
Loss = 3.6886e-01, PNorm = 59.5287, GNorm = 1.3857, lr_0 = 3.5870e-04
Loss = 3.8334e-01, PNorm = 59.5330, GNorm = 1.0593, lr_0 = 3.5845e-04
Loss = 3.6493e-01, PNorm = 59.5392, GNorm = 1.3246, lr_0 = 3.5821e-04
Loss = 3.8915e-01, PNorm = 59.5420, GNorm = 0.9006, lr_0 = 3.5796e-04
Loss = 3.9674e-01, PNorm = 59.5474, GNorm = 1.4413, lr_0 = 3.5772e-04
Loss = 3.8201e-01, PNorm = 59.5543, GNorm = 1.5457, lr_0 = 3.5747e-04
Loss = 4.1002e-01, PNorm = 59.5578, GNorm = 1.5585, lr_0 = 3.5723e-04
Loss = 3.4535e-01, PNorm = 59.5625, GNorm = 1.3845, lr_0 = 3.5698e-04
Loss = 3.5543e-01, PNorm = 59.5671, GNorm = 1.5694, lr_0 = 3.5674e-04
Loss = 4.0475e-01, PNorm = 59.5678, GNorm = 1.5683, lr_0 = 3.5650e-04
Loss = 3.5373e-01, PNorm = 59.5756, GNorm = 0.9967, lr_0 = 3.5625e-04
Loss = 3.8963e-01, PNorm = 59.5821, GNorm = 1.2816, lr_0 = 3.5601e-04
Loss = 3.9012e-01, PNorm = 59.5859, GNorm = 1.4573, lr_0 = 3.5576e-04
Loss = 3.7960e-01, PNorm = 59.5930, GNorm = 1.4328, lr_0 = 3.5552e-04
Loss = 4.3861e-01, PNorm = 59.5989, GNorm = 1.6409, lr_0 = 3.5528e-04
Loss = 3.9280e-01, PNorm = 59.6018, GNorm = 1.9625, lr_0 = 3.5503e-04
Loss = 3.9310e-01, PNorm = 59.6056, GNorm = 1.5355, lr_0 = 3.5479e-04
Loss = 3.8047e-01, PNorm = 59.6110, GNorm = 2.0043, lr_0 = 3.5455e-04
Loss = 3.7915e-01, PNorm = 59.6130, GNorm = 1.3437, lr_0 = 3.5430e-04
Loss = 3.9611e-01, PNorm = 59.6201, GNorm = 1.8139, lr_0 = 3.5406e-04
Loss = 3.5654e-01, PNorm = 59.6271, GNorm = 1.4158, lr_0 = 3.5382e-04
Loss = 3.7225e-01, PNorm = 59.6298, GNorm = 1.6681, lr_0 = 3.5358e-04
Loss = 3.9831e-01, PNorm = 59.6379, GNorm = 1.1483, lr_0 = 3.5333e-04
Loss = 3.4202e-01, PNorm = 59.6412, GNorm = 1.4702, lr_0 = 3.5309e-04
Loss = 4.0212e-01, PNorm = 59.6459, GNorm = 1.5114, lr_0 = 3.5285e-04
Loss = 4.2256e-01, PNorm = 59.6492, GNorm = 1.3389, lr_0 = 3.5261e-04
Loss = 4.7149e-01, PNorm = 59.6529, GNorm = 1.8886, lr_0 = 3.5237e-04
Loss = 3.8973e-01, PNorm = 59.6564, GNorm = 1.2083, lr_0 = 3.5212e-04
Loss = 4.0241e-01, PNorm = 59.6632, GNorm = 1.1187, lr_0 = 3.5188e-04
Loss = 4.0572e-01, PNorm = 59.6705, GNorm = 1.2352, lr_0 = 3.5164e-04
Loss = 3.5468e-01, PNorm = 59.6730, GNorm = 1.3891, lr_0 = 3.5140e-04
Loss = 3.6219e-01, PNorm = 59.6790, GNorm = 1.4475, lr_0 = 3.5116e-04
Loss = 3.9963e-01, PNorm = 59.6836, GNorm = 1.2723, lr_0 = 3.5092e-04
Loss = 4.3615e-01, PNorm = 59.6873, GNorm = 1.2372, lr_0 = 3.5068e-04
Loss = 4.2221e-01, PNorm = 59.6909, GNorm = 2.2630, lr_0 = 3.5044e-04
Loss = 4.0902e-01, PNorm = 59.6996, GNorm = 2.3772, lr_0 = 3.5020e-04
Loss = 4.2981e-01, PNorm = 59.7043, GNorm = 1.6173, lr_0 = 3.4996e-04
Loss = 4.0376e-01, PNorm = 59.7075, GNorm = 1.2377, lr_0 = 3.4972e-04
Loss = 4.1287e-01, PNorm = 59.7110, GNorm = 1.8478, lr_0 = 3.4948e-04
Loss = 3.6246e-01, PNorm = 59.7183, GNorm = 1.5093, lr_0 = 3.4924e-04
Loss = 3.8783e-01, PNorm = 59.7216, GNorm = 0.9867, lr_0 = 3.4900e-04
Loss = 4.0022e-01, PNorm = 59.7279, GNorm = 1.6643, lr_0 = 3.4876e-04
Loss = 3.4603e-01, PNorm = 59.7363, GNorm = 1.3306, lr_0 = 3.4852e-04
Loss = 3.6706e-01, PNorm = 59.7422, GNorm = 1.6150, lr_0 = 3.4828e-04
Loss = 3.8214e-01, PNorm = 59.7465, GNorm = 1.4567, lr_0 = 3.4805e-04
Loss = 3.5750e-01, PNorm = 59.7529, GNorm = 2.1569, lr_0 = 3.4781e-04
Loss = 3.9546e-01, PNorm = 59.7568, GNorm = 0.9343, lr_0 = 3.4757e-04
Loss = 3.8560e-01, PNorm = 59.7627, GNorm = 1.8655, lr_0 = 3.4733e-04
Loss = 4.0605e-01, PNorm = 59.7693, GNorm = 1.6020, lr_0 = 3.4709e-04
Loss = 3.7890e-01, PNorm = 59.7754, GNorm = 1.6816, lr_0 = 3.4686e-04
Loss = 3.9643e-01, PNorm = 59.7823, GNorm = 1.8596, lr_0 = 3.4662e-04
Loss = 3.8715e-01, PNorm = 59.7858, GNorm = 1.0668, lr_0 = 3.4638e-04
Loss = 4.1436e-01, PNorm = 59.7861, GNorm = 1.3546, lr_0 = 3.4614e-04
Loss = 3.8006e-01, PNorm = 59.7910, GNorm = 1.1960, lr_0 = 3.4591e-04
Loss = 3.7135e-01, PNorm = 59.7973, GNorm = 1.2670, lr_0 = 3.4567e-04
Loss = 3.5139e-01, PNorm = 59.8022, GNorm = 1.3509, lr_0 = 3.4543e-04
Loss = 4.2992e-01, PNorm = 59.8060, GNorm = 1.3644, lr_0 = 3.4520e-04
Loss = 4.0681e-01, PNorm = 59.8202, GNorm = 2.2977, lr_0 = 3.4496e-04
Loss = 4.0133e-01, PNorm = 59.8213, GNorm = 1.6274, lr_0 = 3.4472e-04
Loss = 3.8964e-01, PNorm = 59.8318, GNorm = 1.2158, lr_0 = 3.4449e-04
Loss = 4.3666e-01, PNorm = 59.8352, GNorm = 1.2075, lr_0 = 3.4425e-04
Loss = 4.0592e-01, PNorm = 59.8435, GNorm = 1.4691, lr_0 = 3.4402e-04
Loss = 4.3703e-01, PNorm = 59.8479, GNorm = 1.4243, lr_0 = 3.4378e-04
Loss = 3.7536e-01, PNorm = 59.8573, GNorm = 1.2825, lr_0 = 3.4354e-04
Loss = 3.6005e-01, PNorm = 59.8632, GNorm = 1.6744, lr_0 = 3.4331e-04
Validation mae = 0.113821
Epoch 15
Loss = 4.3137e-01, PNorm = 59.8689, GNorm = 0.7820, lr_0 = 3.4307e-04
Loss = 4.0465e-01, PNorm = 59.8779, GNorm = 1.3218, lr_0 = 3.4284e-04
Loss = 3.6197e-01, PNorm = 59.8851, GNorm = 1.2429, lr_0 = 3.4260e-04
Loss = 3.7794e-01, PNorm = 59.8846, GNorm = 1.0273, lr_0 = 3.4237e-04
Loss = 4.4610e-01, PNorm = 59.8940, GNorm = 1.6393, lr_0 = 3.4213e-04
Loss = 3.5271e-01, PNorm = 59.9019, GNorm = 0.9123, lr_0 = 3.4190e-04
Loss = 3.4416e-01, PNorm = 59.9106, GNorm = 1.0569, lr_0 = 3.4167e-04
Loss = 3.4748e-01, PNorm = 59.9174, GNorm = 1.6386, lr_0 = 3.4143e-04
Loss = 3.7135e-01, PNorm = 59.9217, GNorm = 2.1061, lr_0 = 3.4120e-04
Loss = 3.4831e-01, PNorm = 59.9274, GNorm = 1.5811, lr_0 = 3.4096e-04
Loss = 3.8946e-01, PNorm = 59.9286, GNorm = 1.2238, lr_0 = 3.4073e-04
Loss = 3.8277e-01, PNorm = 59.9325, GNorm = 1.1439, lr_0 = 3.4050e-04
Loss = 3.8974e-01, PNorm = 59.9331, GNorm = 1.7818, lr_0 = 3.4026e-04
Loss = 3.8967e-01, PNorm = 59.9364, GNorm = 1.5078, lr_0 = 3.4003e-04
Loss = 4.2613e-01, PNorm = 59.9394, GNorm = 1.1836, lr_0 = 3.3980e-04
Loss = 3.9522e-01, PNorm = 59.9410, GNorm = 1.3023, lr_0 = 3.3956e-04
Loss = 3.7404e-01, PNorm = 59.9446, GNorm = 1.1776, lr_0 = 3.3933e-04
Loss = 4.0179e-01, PNorm = 59.9478, GNorm = 1.5811, lr_0 = 3.3910e-04
Loss = 4.2255e-01, PNorm = 59.9544, GNorm = 1.0370, lr_0 = 3.3887e-04
Loss = 3.8310e-01, PNorm = 59.9569, GNorm = 1.9449, lr_0 = 3.3864e-04
Loss = 3.9828e-01, PNorm = 59.9653, GNorm = 1.8503, lr_0 = 3.3840e-04
Loss = 3.8513e-01, PNorm = 59.9683, GNorm = 1.3080, lr_0 = 3.3817e-04
Loss = 4.3807e-01, PNorm = 59.9739, GNorm = 1.0712, lr_0 = 3.3794e-04
Loss = 3.6199e-01, PNorm = 59.9797, GNorm = 1.1348, lr_0 = 3.3771e-04
Loss = 3.8293e-01, PNorm = 59.9875, GNorm = 1.1361, lr_0 = 3.3748e-04
Loss = 4.2353e-01, PNorm = 59.9955, GNorm = 1.2998, lr_0 = 3.3725e-04
Loss = 3.8444e-01, PNorm = 59.9990, GNorm = 1.2761, lr_0 = 3.3701e-04
Loss = 3.9388e-01, PNorm = 60.0048, GNorm = 1.5980, lr_0 = 3.3678e-04
Loss = 3.5228e-01, PNorm = 60.0072, GNorm = 1.1380, lr_0 = 3.3655e-04
Loss = 3.3274e-01, PNorm = 60.0075, GNorm = 1.5060, lr_0 = 3.3632e-04
Loss = 4.0396e-01, PNorm = 60.0102, GNorm = 1.5388, lr_0 = 3.3609e-04
Loss = 4.1160e-01, PNorm = 60.0169, GNorm = 2.2798, lr_0 = 3.3586e-04
Loss = 3.7534e-01, PNorm = 60.0175, GNorm = 1.1722, lr_0 = 3.3563e-04
Loss = 4.0574e-01, PNorm = 60.0252, GNorm = 0.9789, lr_0 = 3.3540e-04
Loss = 3.5448e-01, PNorm = 60.0321, GNorm = 1.1087, lr_0 = 3.3517e-04
Loss = 4.1962e-01, PNorm = 60.0359, GNorm = 1.5802, lr_0 = 3.3494e-04
Loss = 3.4784e-01, PNorm = 60.0410, GNorm = 1.0097, lr_0 = 3.3471e-04
Loss = 4.2157e-01, PNorm = 60.0424, GNorm = 1.0848, lr_0 = 3.3448e-04
Loss = 4.0420e-01, PNorm = 60.0489, GNorm = 1.2022, lr_0 = 3.3425e-04
Loss = 3.4894e-01, PNorm = 60.0520, GNorm = 1.5609, lr_0 = 3.3403e-04
Loss = 3.8426e-01, PNorm = 60.0557, GNorm = 1.9661, lr_0 = 3.3380e-04
Loss = 4.1597e-01, PNorm = 60.0624, GNorm = 1.3192, lr_0 = 3.3357e-04
Loss = 3.5886e-01, PNorm = 60.0686, GNorm = 1.0505, lr_0 = 3.3334e-04
Loss = 3.8218e-01, PNorm = 60.0735, GNorm = 1.7316, lr_0 = 3.3311e-04
Loss = 3.8460e-01, PNorm = 60.0821, GNorm = 1.8083, lr_0 = 3.3288e-04
Loss = 4.0071e-01, PNorm = 60.0839, GNorm = 1.6197, lr_0 = 3.3265e-04
Loss = 3.7422e-01, PNorm = 60.0881, GNorm = 1.2137, lr_0 = 3.3243e-04
Loss = 3.7742e-01, PNorm = 60.0940, GNorm = 1.5124, lr_0 = 3.3220e-04
Loss = 4.0379e-01, PNorm = 60.0982, GNorm = 1.2488, lr_0 = 3.3197e-04
Loss = 4.2196e-01, PNorm = 60.1033, GNorm = 1.3833, lr_0 = 3.3174e-04
Loss = 3.7876e-01, PNorm = 60.1057, GNorm = 1.4639, lr_0 = 3.3152e-04
Loss = 3.2659e-01, PNorm = 60.1115, GNorm = 1.3367, lr_0 = 3.3129e-04
Loss = 4.0558e-01, PNorm = 60.1134, GNorm = 1.6551, lr_0 = 3.3106e-04
Loss = 3.7341e-01, PNorm = 60.1190, GNorm = 1.0942, lr_0 = 3.3084e-04
Loss = 3.8019e-01, PNorm = 60.1230, GNorm = 1.5269, lr_0 = 3.3061e-04
Loss = 3.8175e-01, PNorm = 60.1237, GNorm = 1.7070, lr_0 = 3.3038e-04
Loss = 2.9770e-01, PNorm = 60.1267, GNorm = 1.2804, lr_0 = 3.3016e-04
Loss = 3.6265e-01, PNorm = 60.1325, GNorm = 1.2070, lr_0 = 3.2993e-04
Loss = 4.0009e-01, PNorm = 60.1383, GNorm = 1.2420, lr_0 = 3.2970e-04
Loss = 3.8322e-01, PNorm = 60.1446, GNorm = 1.5237, lr_0 = 3.2948e-04
Loss = 4.0473e-01, PNorm = 60.1470, GNorm = 2.4259, lr_0 = 3.2925e-04
Loss = 3.7716e-01, PNorm = 60.1508, GNorm = 1.5446, lr_0 = 3.2903e-04
Loss = 3.9510e-01, PNorm = 60.1574, GNorm = 1.0641, lr_0 = 3.2880e-04
Loss = 4.1768e-01, PNorm = 60.1609, GNorm = 1.2649, lr_0 = 3.2858e-04
Loss = 3.7040e-01, PNorm = 60.1627, GNorm = 1.4058, lr_0 = 3.2835e-04
Loss = 3.7982e-01, PNorm = 60.1654, GNorm = 1.0473, lr_0 = 3.2813e-04
Loss = 3.4982e-01, PNorm = 60.1722, GNorm = 1.8729, lr_0 = 3.2790e-04
Loss = 3.9385e-01, PNorm = 60.1792, GNorm = 1.0971, lr_0 = 3.2768e-04
Loss = 3.4245e-01, PNorm = 60.1848, GNorm = 0.9942, lr_0 = 3.2745e-04
Loss = 3.6870e-01, PNorm = 60.1864, GNorm = 1.9040, lr_0 = 3.2723e-04
Loss = 3.3713e-01, PNorm = 60.1925, GNorm = 2.1522, lr_0 = 3.2700e-04
Loss = 3.8021e-01, PNorm = 60.1947, GNorm = 1.6699, lr_0 = 3.2678e-04
Loss = 3.7669e-01, PNorm = 60.1980, GNorm = 0.8880, lr_0 = 3.2656e-04
Loss = 3.6351e-01, PNorm = 60.1999, GNorm = 1.1980, lr_0 = 3.2633e-04
Loss = 3.6574e-01, PNorm = 60.2080, GNorm = 2.0934, lr_0 = 3.2611e-04
Loss = 3.6818e-01, PNorm = 60.2123, GNorm = 1.4751, lr_0 = 3.2589e-04
Loss = 3.7468e-01, PNorm = 60.2171, GNorm = 0.8980, lr_0 = 3.2566e-04
Loss = 3.6165e-01, PNorm = 60.2208, GNorm = 1.2213, lr_0 = 3.2544e-04
Loss = 4.1510e-01, PNorm = 60.2269, GNorm = 1.7810, lr_0 = 3.2522e-04
Loss = 3.6107e-01, PNorm = 60.2313, GNorm = 1.1207, lr_0 = 3.2499e-04
Loss = 3.5056e-01, PNorm = 60.2341, GNorm = 1.4927, lr_0 = 3.2477e-04
Loss = 3.7699e-01, PNorm = 60.2386, GNorm = 1.3918, lr_0 = 3.2455e-04
Loss = 3.6325e-01, PNorm = 60.2387, GNorm = 1.4265, lr_0 = 3.2433e-04
Loss = 3.6610e-01, PNorm = 60.2461, GNorm = 1.4811, lr_0 = 3.2410e-04
Loss = 3.4534e-01, PNorm = 60.2489, GNorm = 1.2381, lr_0 = 3.2388e-04
Loss = 3.6337e-01, PNorm = 60.2564, GNorm = 1.3051, lr_0 = 3.2366e-04
Loss = 4.1386e-01, PNorm = 60.2613, GNorm = 1.5033, lr_0 = 3.2344e-04
Loss = 3.8350e-01, PNorm = 60.2661, GNorm = 1.2911, lr_0 = 3.2322e-04
Loss = 4.1900e-01, PNorm = 60.2699, GNorm = 1.5853, lr_0 = 3.2300e-04
Loss = 3.9276e-01, PNorm = 60.2763, GNorm = 1.3785, lr_0 = 3.2277e-04
Loss = 3.7575e-01, PNorm = 60.2797, GNorm = 1.3428, lr_0 = 3.2255e-04
Loss = 4.0170e-01, PNorm = 60.2880, GNorm = 1.4677, lr_0 = 3.2233e-04
Loss = 3.3500e-01, PNorm = 60.2906, GNorm = 1.3952, lr_0 = 3.2211e-04
Loss = 4.1040e-01, PNorm = 60.2909, GNorm = 1.4319, lr_0 = 3.2189e-04
Loss = 4.6492e-01, PNorm = 60.2946, GNorm = 1.6788, lr_0 = 3.2167e-04
Loss = 3.7143e-01, PNorm = 60.2992, GNorm = 1.9780, lr_0 = 3.2145e-04
Loss = 3.6477e-01, PNorm = 60.3046, GNorm = 1.0389, lr_0 = 3.2123e-04
Loss = 3.8651e-01, PNorm = 60.3052, GNorm = 1.1534, lr_0 = 3.2101e-04
Loss = 4.3197e-01, PNorm = 60.3060, GNorm = 1.6755, lr_0 = 3.2079e-04
Loss = 3.8474e-01, PNorm = 60.3125, GNorm = 1.6132, lr_0 = 3.2057e-04
Loss = 3.8992e-01, PNorm = 60.3175, GNorm = 1.7279, lr_0 = 3.2035e-04
Loss = 4.1805e-01, PNorm = 60.3224, GNorm = 1.2117, lr_0 = 3.2013e-04
Loss = 3.9308e-01, PNorm = 60.3240, GNorm = 1.4584, lr_0 = 3.1991e-04
Loss = 3.6010e-01, PNorm = 60.3283, GNorm = 1.7845, lr_0 = 3.1969e-04
Loss = 3.8708e-01, PNorm = 60.3341, GNorm = 1.2359, lr_0 = 3.1947e-04
Loss = 3.5901e-01, PNorm = 60.3419, GNorm = 1.1812, lr_0 = 3.1925e-04
Loss = 4.2092e-01, PNorm = 60.3433, GNorm = 1.0064, lr_0 = 3.1904e-04
Loss = 3.6769e-01, PNorm = 60.3456, GNorm = 1.5671, lr_0 = 3.1882e-04
Loss = 4.3703e-01, PNorm = 60.3525, GNorm = 1.5786, lr_0 = 3.1860e-04
Loss = 3.6078e-01, PNorm = 60.3569, GNorm = 1.0897, lr_0 = 3.1838e-04
Loss = 4.1235e-01, PNorm = 60.3655, GNorm = 1.3640, lr_0 = 3.1816e-04
Loss = 3.8027e-01, PNorm = 60.3645, GNorm = 1.8615, lr_0 = 3.1794e-04
Loss = 3.3801e-01, PNorm = 60.3688, GNorm = 1.0160, lr_0 = 3.1773e-04
Loss = 4.0112e-01, PNorm = 60.3775, GNorm = 1.8396, lr_0 = 3.1751e-04
Loss = 3.9890e-01, PNorm = 60.3839, GNorm = 1.5013, lr_0 = 3.1729e-04
Loss = 4.1766e-01, PNorm = 60.3920, GNorm = 1.6204, lr_0 = 3.1707e-04
Loss = 3.3225e-01, PNorm = 60.3964, GNorm = 1.2050, lr_0 = 3.1686e-04
Loss = 4.2222e-01, PNorm = 60.4012, GNorm = 1.0835, lr_0 = 3.1664e-04
Loss = 3.5080e-01, PNorm = 60.4081, GNorm = 1.0440, lr_0 = 3.1642e-04
Loss = 4.0507e-01, PNorm = 60.4097, GNorm = 0.8271, lr_0 = 3.1621e-04
Validation mae = 0.114224
Epoch 16
Loss = 3.9162e-01, PNorm = 60.4133, GNorm = 1.3959, lr_0 = 3.1599e-04
Loss = 4.7250e-01, PNorm = 60.4151, GNorm = 1.8704, lr_0 = 3.1577e-04
Loss = 4.0741e-01, PNorm = 60.4190, GNorm = 1.4617, lr_0 = 3.1556e-04
Loss = 3.5705e-01, PNorm = 60.4232, GNorm = 1.0266, lr_0 = 3.1534e-04
Loss = 3.6843e-01, PNorm = 60.4314, GNorm = 0.9469, lr_0 = 3.1512e-04
Loss = 3.9580e-01, PNorm = 60.4338, GNorm = 1.4012, lr_0 = 3.1491e-04
Loss = 3.5449e-01, PNorm = 60.4393, GNorm = 2.0591, lr_0 = 3.1469e-04
Loss = 3.8806e-01, PNorm = 60.4410, GNorm = 2.1652, lr_0 = 3.1448e-04
Loss = 3.8363e-01, PNorm = 60.4416, GNorm = 1.0981, lr_0 = 3.1426e-04
Loss = 4.0666e-01, PNorm = 60.4465, GNorm = 1.5844, lr_0 = 3.1405e-04
Loss = 4.0017e-01, PNorm = 60.4469, GNorm = 1.5725, lr_0 = 3.1383e-04
Loss = 3.4867e-01, PNorm = 60.4522, GNorm = 1.5387, lr_0 = 3.1362e-04
Loss = 3.7379e-01, PNorm = 60.4590, GNorm = 1.1048, lr_0 = 3.1340e-04
Loss = 4.1261e-01, PNorm = 60.4602, GNorm = 1.3713, lr_0 = 3.1319e-04
Loss = 3.4748e-01, PNorm = 60.4666, GNorm = 1.4578, lr_0 = 3.1297e-04
Loss = 3.7014e-01, PNorm = 60.4692, GNorm = 1.0111, lr_0 = 3.1276e-04
Loss = 3.4638e-01, PNorm = 60.4742, GNorm = 1.5644, lr_0 = 3.1254e-04
Loss = 3.8738e-01, PNorm = 60.4768, GNorm = 1.3663, lr_0 = 3.1233e-04
Loss = 3.6307e-01, PNorm = 60.4817, GNorm = 1.2950, lr_0 = 3.1212e-04
Loss = 4.0471e-01, PNorm = 60.4831, GNorm = 1.9940, lr_0 = 3.1190e-04
Loss = 3.4857e-01, PNorm = 60.4865, GNorm = 1.2567, lr_0 = 3.1169e-04
Loss = 3.8295e-01, PNorm = 60.4906, GNorm = 1.2043, lr_0 = 3.1147e-04
Loss = 3.8356e-01, PNorm = 60.4952, GNorm = 1.6910, lr_0 = 3.1126e-04
Loss = 3.8136e-01, PNorm = 60.4943, GNorm = 1.2530, lr_0 = 3.1105e-04
Loss = 3.6224e-01, PNorm = 60.5028, GNorm = 1.2970, lr_0 = 3.1083e-04
Loss = 3.7481e-01, PNorm = 60.5071, GNorm = 1.2035, lr_0 = 3.1062e-04
Loss = 3.9283e-01, PNorm = 60.5104, GNorm = 1.6741, lr_0 = 3.1041e-04
Loss = 3.4830e-01, PNorm = 60.5162, GNorm = 1.3290, lr_0 = 3.1020e-04
Loss = 3.6535e-01, PNorm = 60.5195, GNorm = 0.8910, lr_0 = 3.0998e-04
Loss = 4.2812e-01, PNorm = 60.5252, GNorm = 1.5131, lr_0 = 3.0977e-04
Loss = 3.8037e-01, PNorm = 60.5300, GNorm = 1.7403, lr_0 = 3.0956e-04
Loss = 3.9194e-01, PNorm = 60.5326, GNorm = 1.5574, lr_0 = 3.0935e-04
Loss = 3.5509e-01, PNorm = 60.5373, GNorm = 1.7047, lr_0 = 3.0914e-04
Loss = 3.9832e-01, PNorm = 60.5446, GNorm = 1.4435, lr_0 = 3.0892e-04
Loss = 3.6370e-01, PNorm = 60.5498, GNorm = 1.2234, lr_0 = 3.0871e-04
Loss = 3.9623e-01, PNorm = 60.5507, GNorm = 1.8475, lr_0 = 3.0850e-04
Loss = 3.3969e-01, PNorm = 60.5503, GNorm = 1.0420, lr_0 = 3.0829e-04
Loss = 4.2080e-01, PNorm = 60.5489, GNorm = 1.2984, lr_0 = 3.0808e-04
Loss = 3.2154e-01, PNorm = 60.5503, GNorm = 1.4506, lr_0 = 3.0787e-04
Loss = 3.1503e-01, PNorm = 60.5574, GNorm = 1.9870, lr_0 = 3.0766e-04
Loss = 3.3305e-01, PNorm = 60.5608, GNorm = 1.3240, lr_0 = 3.0745e-04
Loss = 3.9295e-01, PNorm = 60.5661, GNorm = 1.7265, lr_0 = 3.0723e-04
Loss = 3.7907e-01, PNorm = 60.5695, GNorm = 0.9167, lr_0 = 3.0702e-04
Loss = 3.7964e-01, PNorm = 60.5706, GNorm = 1.0647, lr_0 = 3.0681e-04
Loss = 3.9867e-01, PNorm = 60.5780, GNorm = 1.2736, lr_0 = 3.0660e-04
Loss = 3.5192e-01, PNorm = 60.5853, GNorm = 1.0945, lr_0 = 3.0639e-04
Loss = 3.7157e-01, PNorm = 60.5879, GNorm = 1.2897, lr_0 = 3.0618e-04
Loss = 3.4457e-01, PNorm = 60.5934, GNorm = 1.2052, lr_0 = 3.0597e-04
Loss = 4.7895e-01, PNorm = 60.5979, GNorm = 1.4424, lr_0 = 3.0576e-04
Loss = 3.8291e-01, PNorm = 60.6038, GNorm = 2.2025, lr_0 = 3.0555e-04
Loss = 3.3777e-01, PNorm = 60.6077, GNorm = 1.5434, lr_0 = 3.0535e-04
Loss = 3.9598e-01, PNorm = 60.6130, GNorm = 1.3813, lr_0 = 3.0514e-04
Loss = 4.2130e-01, PNorm = 60.6172, GNorm = 1.5767, lr_0 = 3.0493e-04
Loss = 3.2407e-01, PNorm = 60.6234, GNorm = 1.3679, lr_0 = 3.0472e-04
Loss = 3.5900e-01, PNorm = 60.6225, GNorm = 1.1661, lr_0 = 3.0451e-04
Loss = 3.9125e-01, PNorm = 60.6255, GNorm = 1.2444, lr_0 = 3.0430e-04
Loss = 3.8333e-01, PNorm = 60.6284, GNorm = 2.4351, lr_0 = 3.0409e-04
Loss = 3.7866e-01, PNorm = 60.6335, GNorm = 1.1993, lr_0 = 3.0388e-04
Loss = 3.4155e-01, PNorm = 60.6381, GNorm = 2.1708, lr_0 = 3.0368e-04
Loss = 3.8963e-01, PNorm = 60.6402, GNorm = 2.0237, lr_0 = 3.0347e-04
Loss = 3.9565e-01, PNorm = 60.6428, GNorm = 1.0070, lr_0 = 3.0326e-04
Loss = 3.9987e-01, PNorm = 60.6444, GNorm = 1.4463, lr_0 = 3.0305e-04
Loss = 3.8880e-01, PNorm = 60.6492, GNorm = 1.4486, lr_0 = 3.0284e-04
Loss = 4.0427e-01, PNorm = 60.6498, GNorm = 1.0150, lr_0 = 3.0264e-04
Loss = 3.9709e-01, PNorm = 60.6542, GNorm = 1.2854, lr_0 = 3.0243e-04
Loss = 4.2580e-01, PNorm = 60.6578, GNorm = 1.1815, lr_0 = 3.0222e-04
Loss = 3.8627e-01, PNorm = 60.6624, GNorm = 2.3047, lr_0 = 3.0202e-04
Loss = 3.7768e-01, PNorm = 60.6683, GNorm = 1.2292, lr_0 = 3.0181e-04
Loss = 3.7042e-01, PNorm = 60.6728, GNorm = 1.1105, lr_0 = 3.0160e-04
Loss = 3.9031e-01, PNorm = 60.6776, GNorm = 1.0793, lr_0 = 3.0140e-04
Loss = 3.5621e-01, PNorm = 60.6838, GNorm = 2.1867, lr_0 = 3.0119e-04
Loss = 3.5847e-01, PNorm = 60.6859, GNorm = 1.1300, lr_0 = 3.0098e-04
Loss = 4.0852e-01, PNorm = 60.6894, GNorm = 1.4716, lr_0 = 3.0078e-04
Loss = 4.0975e-01, PNorm = 60.6899, GNorm = 1.4079, lr_0 = 3.0057e-04
Loss = 3.4930e-01, PNorm = 60.6970, GNorm = 0.9337, lr_0 = 3.0036e-04
Loss = 3.0880e-01, PNorm = 60.7022, GNorm = 1.1551, lr_0 = 3.0016e-04
Loss = 3.6038e-01, PNorm = 60.7035, GNorm = 1.3504, lr_0 = 2.9995e-04
Loss = 3.4992e-01, PNorm = 60.7088, GNorm = 1.4947, lr_0 = 2.9975e-04
Loss = 3.6420e-01, PNorm = 60.7088, GNorm = 1.4340, lr_0 = 2.9954e-04
Loss = 3.8599e-01, PNorm = 60.7123, GNorm = 0.8132, lr_0 = 2.9934e-04
Loss = 4.0657e-01, PNorm = 60.7210, GNorm = 1.4639, lr_0 = 2.9913e-04
Loss = 3.5921e-01, PNorm = 60.7270, GNorm = 1.4464, lr_0 = 2.9893e-04
Loss = 3.5062e-01, PNorm = 60.7313, GNorm = 1.1621, lr_0 = 2.9872e-04
Loss = 3.4891e-01, PNorm = 60.7371, GNorm = 1.1370, lr_0 = 2.9852e-04
Loss = 3.5337e-01, PNorm = 60.7419, GNorm = 1.3737, lr_0 = 2.9831e-04
Loss = 3.8893e-01, PNorm = 60.7450, GNorm = 1.4057, lr_0 = 2.9811e-04
Loss = 3.7399e-01, PNorm = 60.7472, GNorm = 1.2570, lr_0 = 2.9790e-04
Loss = 3.7399e-01, PNorm = 60.7496, GNorm = 1.8892, lr_0 = 2.9770e-04
Loss = 3.7401e-01, PNorm = 60.7522, GNorm = 1.0384, lr_0 = 2.9750e-04
Loss = 3.6965e-01, PNorm = 60.7569, GNorm = 1.4634, lr_0 = 2.9729e-04
Loss = 3.8654e-01, PNorm = 60.7585, GNorm = 2.0705, lr_0 = 2.9709e-04
Loss = 4.1729e-01, PNorm = 60.7646, GNorm = 1.3979, lr_0 = 2.9689e-04
Loss = 3.9069e-01, PNorm = 60.7679, GNorm = 1.3837, lr_0 = 2.9668e-04
Loss = 3.5729e-01, PNorm = 60.7693, GNorm = 1.1909, lr_0 = 2.9648e-04
Loss = 3.7062e-01, PNorm = 60.7722, GNorm = 1.0546, lr_0 = 2.9628e-04
Loss = 3.5419e-01, PNorm = 60.7756, GNorm = 0.9967, lr_0 = 2.9607e-04
Loss = 3.6379e-01, PNorm = 60.7762, GNorm = 1.6388, lr_0 = 2.9587e-04
Loss = 4.5055e-01, PNorm = 60.7787, GNorm = 1.2000, lr_0 = 2.9567e-04
Loss = 3.5975e-01, PNorm = 60.7787, GNorm = 1.2600, lr_0 = 2.9546e-04
Loss = 3.4428e-01, PNorm = 60.7839, GNorm = 1.1459, lr_0 = 2.9526e-04
Loss = 3.7861e-01, PNorm = 60.7904, GNorm = 1.5461, lr_0 = 2.9506e-04
Loss = 3.7587e-01, PNorm = 60.7933, GNorm = 1.6405, lr_0 = 2.9486e-04
Loss = 4.2029e-01, PNorm = 60.8022, GNorm = 1.4697, lr_0 = 2.9466e-04
Loss = 4.2984e-01, PNorm = 60.8033, GNorm = 1.6172, lr_0 = 2.9445e-04
Loss = 3.6667e-01, PNorm = 60.8056, GNorm = 2.0366, lr_0 = 2.9425e-04
Loss = 3.8857e-01, PNorm = 60.8106, GNorm = 1.3694, lr_0 = 2.9405e-04
Loss = 3.8515e-01, PNorm = 60.8121, GNorm = 1.1318, lr_0 = 2.9385e-04
Loss = 3.8206e-01, PNorm = 60.8165, GNorm = 1.5899, lr_0 = 2.9365e-04
Loss = 3.9871e-01, PNorm = 60.8224, GNorm = 1.3964, lr_0 = 2.9345e-04
Loss = 3.4010e-01, PNorm = 60.8240, GNorm = 1.3480, lr_0 = 2.9325e-04
Loss = 3.9010e-01, PNorm = 60.8266, GNorm = 1.3137, lr_0 = 2.9305e-04
Loss = 4.0771e-01, PNorm = 60.8315, GNorm = 1.0826, lr_0 = 2.9284e-04
Loss = 3.8659e-01, PNorm = 60.8347, GNorm = 1.4802, lr_0 = 2.9264e-04
Loss = 3.4109e-01, PNorm = 60.8391, GNorm = 1.0502, lr_0 = 2.9244e-04
Loss = 3.5352e-01, PNorm = 60.8421, GNorm = 1.5170, lr_0 = 2.9224e-04
Loss = 4.1170e-01, PNorm = 60.8472, GNorm = 1.0747, lr_0 = 2.9204e-04
Loss = 3.8006e-01, PNorm = 60.8505, GNorm = 1.6083, lr_0 = 2.9184e-04
Loss = 4.3264e-01, PNorm = 60.8529, GNorm = 2.3391, lr_0 = 2.9164e-04
Loss = 4.3418e-01, PNorm = 60.8539, GNorm = 1.9016, lr_0 = 2.9144e-04
Loss = 3.6900e-01, PNorm = 60.8609, GNorm = 1.2983, lr_0 = 2.9124e-04
Validation mae = 0.115110
Epoch 17
Loss = 4.0327e-01, PNorm = 60.8604, GNorm = 1.0452, lr_0 = 2.9104e-04
Loss = 3.7244e-01, PNorm = 60.8655, GNorm = 1.0879, lr_0 = 2.9084e-04
Loss = 3.1891e-01, PNorm = 60.8693, GNorm = 1.3983, lr_0 = 2.9065e-04
Loss = 4.2458e-01, PNorm = 60.8737, GNorm = 1.6223, lr_0 = 2.9045e-04
Loss = 3.5120e-01, PNorm = 60.8768, GNorm = 1.7498, lr_0 = 2.9025e-04
Loss = 3.7914e-01, PNorm = 60.8799, GNorm = 1.6806, lr_0 = 2.9005e-04
Loss = 4.1072e-01, PNorm = 60.8857, GNorm = 2.6931, lr_0 = 2.8985e-04
Loss = 4.0345e-01, PNorm = 60.8917, GNorm = 1.6204, lr_0 = 2.8965e-04
Loss = 3.5338e-01, PNorm = 60.9006, GNorm = 1.5315, lr_0 = 2.8945e-04
Loss = 4.0798e-01, PNorm = 60.8991, GNorm = 1.6810, lr_0 = 2.8925e-04
Loss = 3.4057e-01, PNorm = 60.9027, GNorm = 1.1087, lr_0 = 2.8906e-04
Loss = 3.5881e-01, PNorm = 60.9097, GNorm = 1.1383, lr_0 = 2.8886e-04
Loss = 3.7738e-01, PNorm = 60.9089, GNorm = 1.1965, lr_0 = 2.8866e-04
Loss = 3.8070e-01, PNorm = 60.9136, GNorm = 1.2457, lr_0 = 2.8846e-04
Loss = 3.8784e-01, PNorm = 60.9211, GNorm = 1.2899, lr_0 = 2.8826e-04
Loss = 3.8597e-01, PNorm = 60.9271, GNorm = 1.2546, lr_0 = 2.8807e-04
Loss = 3.6110e-01, PNorm = 60.9284, GNorm = 1.3011, lr_0 = 2.8787e-04
Loss = 3.4249e-01, PNorm = 60.9315, GNorm = 1.0915, lr_0 = 2.8767e-04
Loss = 3.8477e-01, PNorm = 60.9346, GNorm = 1.5658, lr_0 = 2.8748e-04
Loss = 4.0593e-01, PNorm = 60.9372, GNorm = 2.2068, lr_0 = 2.8728e-04
Loss = 4.3776e-01, PNorm = 60.9368, GNorm = 2.2714, lr_0 = 2.8708e-04
Loss = 3.8697e-01, PNorm = 60.9458, GNorm = 1.4223, lr_0 = 2.8689e-04
Loss = 3.7626e-01, PNorm = 60.9513, GNorm = 1.2182, lr_0 = 2.8669e-04
Loss = 4.6073e-01, PNorm = 60.9573, GNorm = 1.6011, lr_0 = 2.8649e-04
Loss = 3.8605e-01, PNorm = 60.9583, GNorm = 1.3748, lr_0 = 2.8630e-04
Loss = 3.7142e-01, PNorm = 60.9599, GNorm = 1.4402, lr_0 = 2.8610e-04
Loss = 3.4762e-01, PNorm = 60.9626, GNorm = 1.4872, lr_0 = 2.8590e-04
Loss = 3.5843e-01, PNorm = 60.9675, GNorm = 0.9370, lr_0 = 2.8571e-04
Loss = 4.1138e-01, PNorm = 60.9747, GNorm = 1.3722, lr_0 = 2.8551e-04
Loss = 3.4149e-01, PNorm = 60.9856, GNorm = 1.9294, lr_0 = 2.8532e-04
Loss = 3.7318e-01, PNorm = 60.9893, GNorm = 1.5520, lr_0 = 2.8512e-04
Loss = 3.6609e-01, PNorm = 60.9930, GNorm = 1.1393, lr_0 = 2.8493e-04
Loss = 3.5550e-01, PNorm = 60.9910, GNorm = 1.3937, lr_0 = 2.8473e-04
Loss = 3.5811e-01, PNorm = 60.9942, GNorm = 1.0100, lr_0 = 2.8454e-04
Loss = 3.8154e-01, PNorm = 60.9996, GNorm = 1.6258, lr_0 = 2.8434e-04
Loss = 3.5208e-01, PNorm = 61.0036, GNorm = 1.5630, lr_0 = 2.8415e-04
Loss = 4.1278e-01, PNorm = 61.0038, GNorm = 1.1516, lr_0 = 2.8395e-04
Loss = 4.3335e-01, PNorm = 61.0080, GNorm = 1.5032, lr_0 = 2.8376e-04
Loss = 3.4802e-01, PNorm = 61.0126, GNorm = 1.2448, lr_0 = 2.8356e-04
Loss = 3.3966e-01, PNorm = 61.0139, GNorm = 1.0510, lr_0 = 2.8337e-04
Loss = 3.5771e-01, PNorm = 61.0148, GNorm = 1.2517, lr_0 = 2.8317e-04
Loss = 4.3314e-01, PNorm = 61.0172, GNorm = 1.4758, lr_0 = 2.8298e-04
Loss = 3.2954e-01, PNorm = 61.0210, GNorm = 1.2153, lr_0 = 2.8279e-04
Loss = 3.4810e-01, PNorm = 61.0230, GNorm = 1.3302, lr_0 = 2.8259e-04
Loss = 3.9724e-01, PNorm = 61.0277, GNorm = 1.2853, lr_0 = 2.8240e-04
Loss = 3.8857e-01, PNorm = 61.0338, GNorm = 1.4229, lr_0 = 2.8221e-04
Loss = 3.3186e-01, PNorm = 61.0352, GNorm = 1.3180, lr_0 = 2.8201e-04
Loss = 3.4438e-01, PNorm = 61.0357, GNorm = 1.4135, lr_0 = 2.8182e-04
Loss = 3.5393e-01, PNorm = 61.0432, GNorm = 1.5681, lr_0 = 2.8163e-04
Loss = 3.5920e-01, PNorm = 61.0498, GNorm = 1.6652, lr_0 = 2.8143e-04
Loss = 4.1055e-01, PNorm = 61.0525, GNorm = 1.4655, lr_0 = 2.8124e-04
Loss = 3.7612e-01, PNorm = 61.0517, GNorm = 1.6714, lr_0 = 2.8105e-04
Loss = 3.3941e-01, PNorm = 61.0512, GNorm = 1.2132, lr_0 = 2.8085e-04
Loss = 4.3357e-01, PNorm = 61.0588, GNorm = 1.5586, lr_0 = 2.8066e-04
Loss = 3.7554e-01, PNorm = 61.0618, GNorm = 1.9520, lr_0 = 2.8047e-04
Loss = 3.9255e-01, PNorm = 61.0645, GNorm = 1.4706, lr_0 = 2.8028e-04
Loss = 3.6561e-01, PNorm = 61.0700, GNorm = 1.2173, lr_0 = 2.8009e-04
Loss = 3.4546e-01, PNorm = 61.0707, GNorm = 1.1850, lr_0 = 2.7989e-04
Loss = 3.1740e-01, PNorm = 61.0741, GNorm = 2.6897, lr_0 = 2.7970e-04
Loss = 4.0035e-01, PNorm = 61.0762, GNorm = 1.2405, lr_0 = 2.7951e-04
Loss = 4.0589e-01, PNorm = 61.0761, GNorm = 1.6053, lr_0 = 2.7932e-04
Loss = 3.9437e-01, PNorm = 61.0841, GNorm = 1.8161, lr_0 = 2.7913e-04
Loss = 4.1244e-01, PNorm = 61.0900, GNorm = 1.5905, lr_0 = 2.7894e-04
Loss = 3.7592e-01, PNorm = 61.0928, GNorm = 1.1822, lr_0 = 2.7875e-04
Loss = 3.4539e-01, PNorm = 61.0961, GNorm = 1.1052, lr_0 = 2.7855e-04
Loss = 3.8708e-01, PNorm = 61.0982, GNorm = 1.3268, lr_0 = 2.7836e-04
Loss = 3.5244e-01, PNorm = 61.0976, GNorm = 1.4864, lr_0 = 2.7817e-04
Loss = 4.2553e-01, PNorm = 61.1053, GNorm = 1.0325, lr_0 = 2.7798e-04
Loss = 3.5304e-01, PNorm = 61.1091, GNorm = 1.1322, lr_0 = 2.7779e-04
Loss = 3.7317e-01, PNorm = 61.1132, GNorm = 1.6288, lr_0 = 2.7760e-04
Loss = 3.3846e-01, PNorm = 61.1181, GNorm = 1.3037, lr_0 = 2.7741e-04
Loss = 3.7936e-01, PNorm = 61.1243, GNorm = 1.6030, lr_0 = 2.7722e-04
Loss = 3.4993e-01, PNorm = 61.1288, GNorm = 1.4117, lr_0 = 2.7703e-04
Loss = 3.9033e-01, PNorm = 61.1350, GNorm = 1.3514, lr_0 = 2.7684e-04
Loss = 4.2563e-01, PNorm = 61.1330, GNorm = 1.4375, lr_0 = 2.7665e-04
Loss = 3.8712e-01, PNorm = 61.1369, GNorm = 1.2972, lr_0 = 2.7646e-04
Loss = 3.3344e-01, PNorm = 61.1383, GNorm = 1.7453, lr_0 = 2.7627e-04
Loss = 3.8753e-01, PNorm = 61.1420, GNorm = 1.9007, lr_0 = 2.7608e-04
Loss = 3.6252e-01, PNorm = 61.1459, GNorm = 1.4843, lr_0 = 2.7590e-04
Loss = 4.0400e-01, PNorm = 61.1479, GNorm = 1.7766, lr_0 = 2.7571e-04
Loss = 4.0837e-01, PNorm = 61.1514, GNorm = 1.5224, lr_0 = 2.7552e-04
Loss = 3.7932e-01, PNorm = 61.1505, GNorm = 1.4850, lr_0 = 2.7533e-04
Loss = 3.6036e-01, PNorm = 61.1528, GNorm = 1.4518, lr_0 = 2.7514e-04
Loss = 3.5727e-01, PNorm = 61.1561, GNorm = 1.4275, lr_0 = 2.7495e-04
Loss = 3.8490e-01, PNorm = 61.1611, GNorm = 1.2424, lr_0 = 2.7476e-04
Loss = 3.9498e-01, PNorm = 61.1633, GNorm = 1.7761, lr_0 = 2.7457e-04
Loss = 3.5984e-01, PNorm = 61.1653, GNorm = 1.4899, lr_0 = 2.7439e-04
Loss = 3.4387e-01, PNorm = 61.1717, GNorm = 1.1771, lr_0 = 2.7420e-04
Loss = 3.9161e-01, PNorm = 61.1715, GNorm = 1.4674, lr_0 = 2.7401e-04
Loss = 4.4995e-01, PNorm = 61.1743, GNorm = 1.3920, lr_0 = 2.7382e-04
Loss = 4.3411e-01, PNorm = 61.1748, GNorm = 2.1473, lr_0 = 2.7364e-04
Loss = 3.5370e-01, PNorm = 61.1775, GNorm = 1.1525, lr_0 = 2.7345e-04
Loss = 3.6876e-01, PNorm = 61.1847, GNorm = 1.5987, lr_0 = 2.7326e-04
Loss = 3.8815e-01, PNorm = 61.1881, GNorm = 1.5967, lr_0 = 2.7307e-04
Loss = 4.0092e-01, PNorm = 61.1945, GNorm = 1.5281, lr_0 = 2.7289e-04
Loss = 3.5596e-01, PNorm = 61.1966, GNorm = 1.6810, lr_0 = 2.7270e-04
Loss = 3.9798e-01, PNorm = 61.1998, GNorm = 1.6863, lr_0 = 2.7251e-04
Loss = 3.6857e-01, PNorm = 61.2025, GNorm = 1.2262, lr_0 = 2.7233e-04
Loss = 3.3520e-01, PNorm = 61.2080, GNorm = 1.1694, lr_0 = 2.7214e-04
Loss = 4.3086e-01, PNorm = 61.2127, GNorm = 1.6076, lr_0 = 2.7195e-04
Loss = 3.8600e-01, PNorm = 61.2122, GNorm = 1.1741, lr_0 = 2.7177e-04
Loss = 3.3548e-01, PNorm = 61.2164, GNorm = 1.6415, lr_0 = 2.7158e-04
Loss = 3.3017e-01, PNorm = 61.2218, GNorm = 1.2644, lr_0 = 2.7139e-04
Loss = 3.6918e-01, PNorm = 61.2235, GNorm = 1.6258, lr_0 = 2.7121e-04
Loss = 4.6501e-01, PNorm = 61.2292, GNorm = 1.1801, lr_0 = 2.7102e-04
Loss = 3.8870e-01, PNorm = 61.2316, GNorm = 2.0518, lr_0 = 2.7084e-04
Loss = 3.9442e-01, PNorm = 61.2328, GNorm = 1.7302, lr_0 = 2.7065e-04
Loss = 3.7071e-01, PNorm = 61.2371, GNorm = 1.4119, lr_0 = 2.7047e-04
Loss = 4.1275e-01, PNorm = 61.2427, GNorm = 1.1416, lr_0 = 2.7028e-04
Loss = 3.9396e-01, PNorm = 61.2446, GNorm = 1.0948, lr_0 = 2.7010e-04
Loss = 3.7830e-01, PNorm = 61.2495, GNorm = 1.3088, lr_0 = 2.6991e-04
Loss = 3.6774e-01, PNorm = 61.2534, GNorm = 1.2320, lr_0 = 2.6973e-04
Loss = 3.5798e-01, PNorm = 61.2534, GNorm = 1.2863, lr_0 = 2.6954e-04
Loss = 4.0079e-01, PNorm = 61.2564, GNorm = 1.4359, lr_0 = 2.6936e-04
Loss = 3.8755e-01, PNorm = 61.2619, GNorm = 1.3444, lr_0 = 2.6917e-04
Loss = 3.4016e-01, PNorm = 61.2648, GNorm = 1.2808, lr_0 = 2.6899e-04
Loss = 3.5744e-01, PNorm = 61.2657, GNorm = 1.3849, lr_0 = 2.6880e-04
Loss = 3.7216e-01, PNorm = 61.2697, GNorm = 1.5632, lr_0 = 2.6862e-04
Loss = 3.3422e-01, PNorm = 61.2744, GNorm = 1.6661, lr_0 = 2.6844e-04
Loss = 3.6939e-01, PNorm = 61.2769, GNorm = 1.3337, lr_0 = 2.6825e-04
Validation mae = 0.111728
Epoch 18
Loss = 3.5359e-01, PNorm = 61.2800, GNorm = 1.0991, lr_0 = 2.6807e-04
Loss = 3.4234e-01, PNorm = 61.2815, GNorm = 1.3257, lr_0 = 2.6788e-04
Loss = 3.5226e-01, PNorm = 61.2806, GNorm = 1.3004, lr_0 = 2.6770e-04
Loss = 3.9143e-01, PNorm = 61.2816, GNorm = 1.5600, lr_0 = 2.6752e-04
Loss = 3.4658e-01, PNorm = 61.2884, GNorm = 1.4099, lr_0 = 2.6733e-04
Loss = 4.1761e-01, PNorm = 61.2893, GNorm = 1.5328, lr_0 = 2.6715e-04
Loss = 3.6188e-01, PNorm = 61.2963, GNorm = 1.5252, lr_0 = 2.6697e-04
Loss = 3.4856e-01, PNorm = 61.3004, GNorm = 1.2070, lr_0 = 2.6678e-04
Loss = 3.2659e-01, PNorm = 61.2969, GNorm = 1.7022, lr_0 = 2.6660e-04
Loss = 3.8095e-01, PNorm = 61.3046, GNorm = 1.3915, lr_0 = 2.6642e-04
Loss = 3.4550e-01, PNorm = 61.3108, GNorm = 1.1625, lr_0 = 2.6624e-04
Loss = 3.8340e-01, PNorm = 61.3147, GNorm = 0.9264, lr_0 = 2.6605e-04
Loss = 3.3041e-01, PNorm = 61.3192, GNorm = 1.0645, lr_0 = 2.6587e-04
Loss = 3.4289e-01, PNorm = 61.3261, GNorm = 1.3857, lr_0 = 2.6569e-04
Loss = 3.8470e-01, PNorm = 61.3281, GNorm = 2.1547, lr_0 = 2.6551e-04
Loss = 3.3035e-01, PNorm = 61.3316, GNorm = 1.0273, lr_0 = 2.6533e-04
Loss = 3.9501e-01, PNorm = 61.3337, GNorm = 1.9843, lr_0 = 2.6514e-04
Loss = 3.7433e-01, PNorm = 61.3335, GNorm = 1.0222, lr_0 = 2.6496e-04
Loss = 3.3488e-01, PNorm = 61.3375, GNorm = 1.2023, lr_0 = 2.6478e-04
Loss = 3.6263e-01, PNorm = 61.3432, GNorm = 2.0088, lr_0 = 2.6460e-04
Loss = 3.7696e-01, PNorm = 61.3468, GNorm = 1.4253, lr_0 = 2.6442e-04
Loss = 4.3076e-01, PNorm = 61.3510, GNorm = 1.8968, lr_0 = 2.6424e-04
Loss = 3.3440e-01, PNorm = 61.3566, GNorm = 2.0212, lr_0 = 2.6406e-04
Loss = 3.5812e-01, PNorm = 61.3592, GNorm = 2.5991, lr_0 = 2.6388e-04
Loss = 4.2672e-01, PNorm = 61.3600, GNorm = 2.2561, lr_0 = 2.6369e-04
Loss = 3.5344e-01, PNorm = 61.3660, GNorm = 1.7115, lr_0 = 2.6351e-04
Loss = 3.2696e-01, PNorm = 61.3655, GNorm = 1.6206, lr_0 = 2.6333e-04
Loss = 3.6887e-01, PNorm = 61.3717, GNorm = 1.3444, lr_0 = 2.6315e-04
Loss = 3.5090e-01, PNorm = 61.3764, GNorm = 1.7638, lr_0 = 2.6297e-04
Loss = 3.4616e-01, PNorm = 61.3811, GNorm = 1.2280, lr_0 = 2.6279e-04
Loss = 3.2834e-01, PNorm = 61.3848, GNorm = 2.3735, lr_0 = 2.6261e-04
Loss = 4.1933e-01, PNorm = 61.3900, GNorm = 1.3676, lr_0 = 2.6243e-04
Loss = 3.8874e-01, PNorm = 61.3929, GNorm = 1.5677, lr_0 = 2.6225e-04
Loss = 4.2313e-01, PNorm = 61.3954, GNorm = 1.2694, lr_0 = 2.6207e-04
Loss = 3.6026e-01, PNorm = 61.4009, GNorm = 1.5109, lr_0 = 2.6189e-04
Loss = 4.2003e-01, PNorm = 61.4080, GNorm = 1.5838, lr_0 = 2.6171e-04
Loss = 3.5047e-01, PNorm = 61.4106, GNorm = 1.2399, lr_0 = 2.6153e-04
Loss = 3.5159e-01, PNorm = 61.4108, GNorm = 1.0985, lr_0 = 2.6136e-04
Loss = 3.9203e-01, PNorm = 61.4143, GNorm = 1.7134, lr_0 = 2.6118e-04
Loss = 3.5213e-01, PNorm = 61.4186, GNorm = 1.1286, lr_0 = 2.6100e-04
Loss = 3.6869e-01, PNorm = 61.4203, GNorm = 1.4913, lr_0 = 2.6082e-04
Loss = 3.9868e-01, PNorm = 61.4222, GNorm = 1.7033, lr_0 = 2.6064e-04
Loss = 3.2628e-01, PNorm = 61.4240, GNorm = 1.3638, lr_0 = 2.6046e-04
Loss = 3.4605e-01, PNorm = 61.4255, GNorm = 1.2219, lr_0 = 2.6028e-04
Loss = 3.9294e-01, PNorm = 61.4305, GNorm = 1.4358, lr_0 = 2.6011e-04
Loss = 3.9854e-01, PNorm = 61.4337, GNorm = 1.6435, lr_0 = 2.5993e-04
Loss = 3.3911e-01, PNorm = 61.4389, GNorm = 1.1723, lr_0 = 2.5975e-04
Loss = 3.4000e-01, PNorm = 61.4418, GNorm = 1.5268, lr_0 = 2.5957e-04
Loss = 4.0616e-01, PNorm = 61.4459, GNorm = 1.8922, lr_0 = 2.5939e-04
Loss = 3.6158e-01, PNorm = 61.4481, GNorm = 1.3489, lr_0 = 2.5922e-04
Loss = 3.4783e-01, PNorm = 61.4521, GNorm = 1.2983, lr_0 = 2.5904e-04
Loss = 4.1031e-01, PNorm = 61.4574, GNorm = 1.4155, lr_0 = 2.5886e-04
Loss = 3.6626e-01, PNorm = 61.4597, GNorm = 1.5071, lr_0 = 2.5868e-04
Loss = 3.7221e-01, PNorm = 61.4638, GNorm = 1.5724, lr_0 = 2.5851e-04
Loss = 3.6484e-01, PNorm = 61.4662, GNorm = 1.3209, lr_0 = 2.5833e-04
Loss = 3.4961e-01, PNorm = 61.4724, GNorm = 1.6034, lr_0 = 2.5815e-04
Loss = 3.8901e-01, PNorm = 61.4741, GNorm = 1.1167, lr_0 = 2.5797e-04
Loss = 4.2184e-01, PNorm = 61.4758, GNorm = 1.8282, lr_0 = 2.5780e-04
Loss = 4.1276e-01, PNorm = 61.4803, GNorm = 1.5371, lr_0 = 2.5762e-04
Loss = 3.7682e-01, PNorm = 61.4845, GNorm = 1.8580, lr_0 = 2.5745e-04
Loss = 4.6344e-01, PNorm = 61.4827, GNorm = 1.2605, lr_0 = 2.5727e-04
Loss = 3.4741e-01, PNorm = 61.4847, GNorm = 1.8928, lr_0 = 2.5709e-04
Loss = 3.7704e-01, PNorm = 61.4866, GNorm = 1.3473, lr_0 = 2.5692e-04
Loss = 3.6516e-01, PNorm = 61.4887, GNorm = 1.7159, lr_0 = 2.5674e-04
Loss = 4.0103e-01, PNorm = 61.4905, GNorm = 1.7102, lr_0 = 2.5656e-04
Loss = 3.7266e-01, PNorm = 61.4970, GNorm = 1.1183, lr_0 = 2.5639e-04
Loss = 3.7497e-01, PNorm = 61.4991, GNorm = 1.4067, lr_0 = 2.5621e-04
Loss = 3.5635e-01, PNorm = 61.5023, GNorm = 1.4944, lr_0 = 2.5604e-04
Loss = 3.9204e-01, PNorm = 61.5042, GNorm = 1.2729, lr_0 = 2.5586e-04
Loss = 4.0575e-01, PNorm = 61.5094, GNorm = 1.1526, lr_0 = 2.5569e-04
Loss = 3.4309e-01, PNorm = 61.5136, GNorm = 1.9293, lr_0 = 2.5551e-04
Loss = 3.7158e-01, PNorm = 61.5118, GNorm = 1.2015, lr_0 = 2.5534e-04
Loss = 4.2808e-01, PNorm = 61.5160, GNorm = 1.0628, lr_0 = 2.5516e-04
Loss = 3.7872e-01, PNorm = 61.5190, GNorm = 1.6360, lr_0 = 2.5499e-04
Loss = 3.5332e-01, PNorm = 61.5246, GNorm = 1.4704, lr_0 = 2.5481e-04
Loss = 3.8645e-01, PNorm = 61.5261, GNorm = 1.6589, lr_0 = 2.5464e-04
Loss = 4.2379e-01, PNorm = 61.5272, GNorm = 1.5729, lr_0 = 2.5446e-04
Loss = 3.7207e-01, PNorm = 61.5311, GNorm = 1.3477, lr_0 = 2.5429e-04
Loss = 3.2128e-01, PNorm = 61.5327, GNorm = 1.1058, lr_0 = 2.5411e-04
Loss = 3.6052e-01, PNorm = 61.5374, GNorm = 1.5272, lr_0 = 2.5394e-04
Loss = 3.6227e-01, PNorm = 61.5427, GNorm = 1.9609, lr_0 = 2.5377e-04
Loss = 3.7494e-01, PNorm = 61.5442, GNorm = 1.3780, lr_0 = 2.5359e-04
Loss = 3.5348e-01, PNorm = 61.5475, GNorm = 1.2383, lr_0 = 2.5342e-04
Loss = 3.6094e-01, PNorm = 61.5490, GNorm = 1.3245, lr_0 = 2.5325e-04
Loss = 3.8222e-01, PNorm = 61.5514, GNorm = 1.4460, lr_0 = 2.5307e-04
Loss = 3.4783e-01, PNorm = 61.5550, GNorm = 1.0960, lr_0 = 2.5290e-04
Loss = 3.9086e-01, PNorm = 61.5606, GNorm = 1.4619, lr_0 = 2.5273e-04
Loss = 3.3010e-01, PNorm = 61.5655, GNorm = 1.4838, lr_0 = 2.5255e-04
Loss = 3.4810e-01, PNorm = 61.5634, GNorm = 1.2412, lr_0 = 2.5238e-04
Loss = 3.6695e-01, PNorm = 61.5647, GNorm = 1.5462, lr_0 = 2.5221e-04
Loss = 3.6874e-01, PNorm = 61.5680, GNorm = 1.4547, lr_0 = 2.5203e-04
Loss = 3.6682e-01, PNorm = 61.5716, GNorm = 1.6137, lr_0 = 2.5186e-04
Loss = 4.7433e-01, PNorm = 61.5767, GNorm = 0.8850, lr_0 = 2.5169e-04
Loss = 3.1242e-01, PNorm = 61.5815, GNorm = 1.6236, lr_0 = 2.5152e-04
Loss = 3.5166e-01, PNorm = 61.5812, GNorm = 1.4188, lr_0 = 2.5134e-04
Loss = 4.2451e-01, PNorm = 61.5852, GNorm = 2.2726, lr_0 = 2.5117e-04
Loss = 3.8751e-01, PNorm = 61.5883, GNorm = 1.3374, lr_0 = 2.5100e-04
Loss = 3.4247e-01, PNorm = 61.5919, GNorm = 1.0071, lr_0 = 2.5083e-04
Loss = 3.9481e-01, PNorm = 61.5970, GNorm = 1.1138, lr_0 = 2.5066e-04
Loss = 3.2768e-01, PNorm = 61.6010, GNorm = 1.3015, lr_0 = 2.5048e-04
Loss = 3.6400e-01, PNorm = 61.6058, GNorm = 1.2653, lr_0 = 2.5031e-04
Loss = 3.5551e-01, PNorm = 61.6084, GNorm = 1.4381, lr_0 = 2.5014e-04
Loss = 3.6953e-01, PNorm = 61.6132, GNorm = 0.9404, lr_0 = 2.4997e-04
Loss = 3.9667e-01, PNorm = 61.6171, GNorm = 1.1374, lr_0 = 2.4980e-04
Loss = 3.8744e-01, PNorm = 61.6203, GNorm = 1.2284, lr_0 = 2.4963e-04
Loss = 3.4468e-01, PNorm = 61.6228, GNorm = 1.3427, lr_0 = 2.4946e-04
Loss = 3.1042e-01, PNorm = 61.6245, GNorm = 1.3935, lr_0 = 2.4929e-04
Loss = 3.7561e-01, PNorm = 61.6238, GNorm = 1.8472, lr_0 = 2.4911e-04
Loss = 4.0215e-01, PNorm = 61.6280, GNorm = 1.5991, lr_0 = 2.4894e-04
Loss = 4.1285e-01, PNorm = 61.6269, GNorm = 1.2090, lr_0 = 2.4877e-04
Loss = 3.4173e-01, PNorm = 61.6286, GNorm = 1.0717, lr_0 = 2.4860e-04
Loss = 3.7077e-01, PNorm = 61.6310, GNorm = 1.0824, lr_0 = 2.4843e-04
Loss = 3.5348e-01, PNorm = 61.6333, GNorm = 1.4333, lr_0 = 2.4826e-04
Loss = 3.3941e-01, PNorm = 61.6361, GNorm = 2.0866, lr_0 = 2.4809e-04
Loss = 3.7175e-01, PNorm = 61.6371, GNorm = 1.2777, lr_0 = 2.4792e-04
Loss = 4.0035e-01, PNorm = 61.6396, GNorm = 1.3457, lr_0 = 2.4775e-04
Loss = 3.7741e-01, PNorm = 61.6430, GNorm = 1.9045, lr_0 = 2.4758e-04
Loss = 3.6085e-01, PNorm = 61.6482, GNorm = 1.0672, lr_0 = 2.4741e-04
Loss = 3.4814e-01, PNorm = 61.6503, GNorm = 1.4882, lr_0 = 2.4724e-04
Loss = 4.1405e-01, PNorm = 61.6490, GNorm = 2.2824, lr_0 = 2.4707e-04
Validation mae = 0.113743
Epoch 19
Loss = 3.4274e-01, PNorm = 61.6544, GNorm = 1.7062, lr_0 = 2.4690e-04
Loss = 3.2392e-01, PNorm = 61.6607, GNorm = 1.6500, lr_0 = 2.4674e-04
Loss = 3.6161e-01, PNorm = 61.6615, GNorm = 1.6813, lr_0 = 2.4657e-04
Loss = 3.4962e-01, PNorm = 61.6636, GNorm = 1.6892, lr_0 = 2.4640e-04
Loss = 3.5896e-01, PNorm = 61.6661, GNorm = 1.9174, lr_0 = 2.4623e-04
Loss = 3.7157e-01, PNorm = 61.6663, GNorm = 1.1644, lr_0 = 2.4606e-04
Loss = 4.2730e-01, PNorm = 61.6703, GNorm = 1.6542, lr_0 = 2.4589e-04
Loss = 3.2792e-01, PNorm = 61.6710, GNorm = 1.3902, lr_0 = 2.4572e-04
Loss = 2.9297e-01, PNorm = 61.6722, GNorm = 1.2868, lr_0 = 2.4556e-04
Loss = 3.5101e-01, PNorm = 61.6763, GNorm = 1.3814, lr_0 = 2.4539e-04
Loss = 3.2009e-01, PNorm = 61.6799, GNorm = 1.4582, lr_0 = 2.4522e-04
Loss = 3.6648e-01, PNorm = 61.6821, GNorm = 1.5661, lr_0 = 2.4505e-04
Loss = 3.4045e-01, PNorm = 61.6845, GNorm = 1.7257, lr_0 = 2.4488e-04
Loss = 3.9222e-01, PNorm = 61.6872, GNorm = 1.3616, lr_0 = 2.4472e-04
Loss = 3.9234e-01, PNorm = 61.6893, GNorm = 1.3844, lr_0 = 2.4455e-04
Loss = 3.4969e-01, PNorm = 61.6890, GNorm = 1.5326, lr_0 = 2.4438e-04
Loss = 3.3432e-01, PNorm = 61.6930, GNorm = 1.1932, lr_0 = 2.4421e-04
Loss = 3.4970e-01, PNorm = 61.6975, GNorm = 1.1409, lr_0 = 2.4405e-04
Loss = 2.9212e-01, PNorm = 61.7012, GNorm = 1.6089, lr_0 = 2.4388e-04
Loss = 4.0432e-01, PNorm = 61.7050, GNorm = 1.7428, lr_0 = 2.4371e-04
Loss = 3.6055e-01, PNorm = 61.7074, GNorm = 1.7173, lr_0 = 2.4354e-04
Loss = 3.1648e-01, PNorm = 61.7136, GNorm = 1.6460, lr_0 = 2.4338e-04
Loss = 3.8996e-01, PNorm = 61.7120, GNorm = 1.8560, lr_0 = 2.4321e-04
Loss = 3.5989e-01, PNorm = 61.7148, GNorm = 1.3054, lr_0 = 2.4304e-04
Loss = 3.3923e-01, PNorm = 61.7200, GNorm = 1.1565, lr_0 = 2.4288e-04
Loss = 3.9893e-01, PNorm = 61.7190, GNorm = 1.4182, lr_0 = 2.4271e-04
Loss = 3.7778e-01, PNorm = 61.7217, GNorm = 1.8472, lr_0 = 2.4254e-04
Loss = 3.1896e-01, PNorm = 61.7273, GNorm = 1.3765, lr_0 = 2.4238e-04
Loss = 3.6562e-01, PNorm = 61.7264, GNorm = 1.7160, lr_0 = 2.4221e-04
Loss = 3.5126e-01, PNorm = 61.7343, GNorm = 1.4217, lr_0 = 2.4205e-04
Loss = 3.4836e-01, PNorm = 61.7418, GNorm = 1.3592, lr_0 = 2.4188e-04
Loss = 3.8934e-01, PNorm = 61.7464, GNorm = 1.3011, lr_0 = 2.4171e-04
Loss = 4.1972e-01, PNorm = 61.7455, GNorm = 1.7130, lr_0 = 2.4155e-04
Loss = 3.7533e-01, PNorm = 61.7500, GNorm = 1.3710, lr_0 = 2.4138e-04
Loss = 3.1631e-01, PNorm = 61.7520, GNorm = 1.6069, lr_0 = 2.4122e-04
Loss = 3.3294e-01, PNorm = 61.7547, GNorm = 2.2514, lr_0 = 2.4105e-04
Loss = 3.6576e-01, PNorm = 61.7589, GNorm = 1.8563, lr_0 = 2.4089e-04
Loss = 3.6331e-01, PNorm = 61.7625, GNorm = 1.9893, lr_0 = 2.4072e-04
Loss = 3.4297e-01, PNorm = 61.7655, GNorm = 1.6140, lr_0 = 2.4056e-04
Loss = 3.7590e-01, PNorm = 61.7678, GNorm = 1.5725, lr_0 = 2.4039e-04
Loss = 4.0945e-01, PNorm = 61.7702, GNorm = 1.4766, lr_0 = 2.4023e-04
Loss = 3.8337e-01, PNorm = 61.7718, GNorm = 1.4957, lr_0 = 2.4006e-04
Loss = 3.4928e-01, PNorm = 61.7745, GNorm = 1.8212, lr_0 = 2.3990e-04
Loss = 4.0648e-01, PNorm = 61.7779, GNorm = 1.8062, lr_0 = 2.3974e-04
Loss = 3.7185e-01, PNorm = 61.7800, GNorm = 1.2236, lr_0 = 2.3957e-04
Loss = 4.1975e-01, PNorm = 61.7853, GNorm = 1.8208, lr_0 = 2.3941e-04
Loss = 3.8521e-01, PNorm = 61.7884, GNorm = 1.5038, lr_0 = 2.3924e-04
Loss = 3.5645e-01, PNorm = 61.7945, GNorm = 1.7095, lr_0 = 2.3908e-04
Loss = 3.4092e-01, PNorm = 61.7996, GNorm = 1.5433, lr_0 = 2.3892e-04
Loss = 3.9667e-01, PNorm = 61.8000, GNorm = 1.2945, lr_0 = 2.3875e-04
Loss = 3.6940e-01, PNorm = 61.8043, GNorm = 1.3181, lr_0 = 2.3859e-04
Loss = 3.8637e-01, PNorm = 61.8085, GNorm = 1.6102, lr_0 = 2.3842e-04
Loss = 3.3804e-01, PNorm = 61.8110, GNorm = 1.1886, lr_0 = 2.3826e-04
Loss = 3.1767e-01, PNorm = 61.8118, GNorm = 1.3772, lr_0 = 2.3810e-04
Loss = 3.8805e-01, PNorm = 61.8133, GNorm = 1.1019, lr_0 = 2.3794e-04
Loss = 3.4275e-01, PNorm = 61.8156, GNorm = 0.9958, lr_0 = 2.3777e-04
Loss = 3.7797e-01, PNorm = 61.8193, GNorm = 1.8346, lr_0 = 2.3761e-04
Loss = 3.7316e-01, PNorm = 61.8221, GNorm = 1.5331, lr_0 = 2.3745e-04
Loss = 3.6444e-01, PNorm = 61.8249, GNorm = 1.4042, lr_0 = 2.3728e-04
Loss = 3.5283e-01, PNorm = 61.8259, GNorm = 1.0887, lr_0 = 2.3712e-04
Loss = 3.5406e-01, PNorm = 61.8261, GNorm = 1.4224, lr_0 = 2.3696e-04
Loss = 3.4126e-01, PNorm = 61.8286, GNorm = 1.5351, lr_0 = 2.3680e-04
Loss = 3.8765e-01, PNorm = 61.8308, GNorm = 2.2448, lr_0 = 2.3663e-04
Loss = 3.7200e-01, PNorm = 61.8320, GNorm = 1.2548, lr_0 = 2.3647e-04
Loss = 4.1843e-01, PNorm = 61.8386, GNorm = 1.3724, lr_0 = 2.3631e-04
Loss = 3.2506e-01, PNorm = 61.8429, GNorm = 1.7877, lr_0 = 2.3615e-04
Loss = 3.2109e-01, PNorm = 61.8429, GNorm = 1.6088, lr_0 = 2.3599e-04
Loss = 4.2183e-01, PNorm = 61.8468, GNorm = 1.6660, lr_0 = 2.3582e-04
Loss = 3.2374e-01, PNorm = 61.8512, GNorm = 1.3770, lr_0 = 2.3566e-04
Loss = 3.8449e-01, PNorm = 61.8543, GNorm = 1.5567, lr_0 = 2.3550e-04
Loss = 4.0437e-01, PNorm = 61.8612, GNorm = 1.2323, lr_0 = 2.3534e-04
Loss = 3.5606e-01, PNorm = 61.8651, GNorm = 1.6202, lr_0 = 2.3518e-04
Loss = 3.6640e-01, PNorm = 61.8671, GNorm = 1.3365, lr_0 = 2.3502e-04
Loss = 4.1673e-01, PNorm = 61.8702, GNorm = 1.2374, lr_0 = 2.3486e-04
Loss = 3.8524e-01, PNorm = 61.8758, GNorm = 1.6250, lr_0 = 2.3470e-04
Loss = 4.0854e-01, PNorm = 61.8804, GNorm = 1.7869, lr_0 = 2.3454e-04
Loss = 3.3451e-01, PNorm = 61.8808, GNorm = 1.5271, lr_0 = 2.3437e-04
Loss = 3.6477e-01, PNorm = 61.8845, GNorm = 1.4386, lr_0 = 2.3421e-04
Loss = 3.4571e-01, PNorm = 61.8863, GNorm = 1.3446, lr_0 = 2.3405e-04
Loss = 3.6669e-01, PNorm = 61.8893, GNorm = 1.5057, lr_0 = 2.3389e-04
Loss = 3.5307e-01, PNorm = 61.8914, GNorm = 1.2473, lr_0 = 2.3373e-04
Loss = 3.6143e-01, PNorm = 61.8926, GNorm = 1.7410, lr_0 = 2.3357e-04
Loss = 3.3653e-01, PNorm = 61.8929, GNorm = 1.1495, lr_0 = 2.3341e-04
Loss = 3.4647e-01, PNorm = 61.8965, GNorm = 1.7866, lr_0 = 2.3325e-04
Loss = 3.3465e-01, PNorm = 61.8978, GNorm = 1.1060, lr_0 = 2.3309e-04
Loss = 3.7207e-01, PNorm = 61.9027, GNorm = 1.2725, lr_0 = 2.3293e-04
Loss = 4.0777e-01, PNorm = 61.9074, GNorm = 1.9475, lr_0 = 2.3277e-04
Loss = 3.6630e-01, PNorm = 61.9102, GNorm = 1.0968, lr_0 = 2.3261e-04
Loss = 3.6842e-01, PNorm = 61.9129, GNorm = 1.6437, lr_0 = 2.3246e-04
Loss = 4.5217e-01, PNorm = 61.9142, GNorm = 1.8871, lr_0 = 2.3230e-04
Loss = 3.7923e-01, PNorm = 61.9148, GNorm = 1.2833, lr_0 = 2.3214e-04
Loss = 4.2853e-01, PNorm = 61.9168, GNorm = 1.7256, lr_0 = 2.3198e-04
Loss = 3.7556e-01, PNorm = 61.9168, GNorm = 1.3213, lr_0 = 2.3182e-04
Loss = 3.5864e-01, PNorm = 61.9186, GNorm = 1.1373, lr_0 = 2.3166e-04
Loss = 4.0270e-01, PNorm = 61.9225, GNorm = 1.4730, lr_0 = 2.3150e-04
Loss = 3.7397e-01, PNorm = 61.9263, GNorm = 1.6888, lr_0 = 2.3134e-04
Loss = 4.1261e-01, PNorm = 61.9280, GNorm = 1.9914, lr_0 = 2.3118e-04
Loss = 3.3550e-01, PNorm = 61.9301, GNorm = 1.4303, lr_0 = 2.3103e-04
Loss = 4.0640e-01, PNorm = 61.9363, GNorm = 2.1180, lr_0 = 2.3087e-04
Loss = 3.4889e-01, PNorm = 61.9402, GNorm = 0.9163, lr_0 = 2.3071e-04
Loss = 3.5242e-01, PNorm = 61.9416, GNorm = 1.4582, lr_0 = 2.3055e-04
Loss = 4.0805e-01, PNorm = 61.9490, GNorm = 1.7870, lr_0 = 2.3039e-04
Loss = 3.4742e-01, PNorm = 61.9534, GNorm = 1.1241, lr_0 = 2.3024e-04
Loss = 3.7763e-01, PNorm = 61.9539, GNorm = 1.7333, lr_0 = 2.3008e-04
Loss = 3.6688e-01, PNorm = 61.9542, GNorm = 1.1620, lr_0 = 2.2992e-04
Loss = 3.8127e-01, PNorm = 61.9540, GNorm = 1.2837, lr_0 = 2.2976e-04
Loss = 4.4868e-01, PNorm = 61.9597, GNorm = 1.5486, lr_0 = 2.2961e-04
Loss = 3.3827e-01, PNorm = 61.9621, GNorm = 1.3760, lr_0 = 2.2945e-04
Loss = 3.8405e-01, PNorm = 61.9626, GNorm = 1.1740, lr_0 = 2.2929e-04
Loss = 3.5189e-01, PNorm = 61.9676, GNorm = 1.6425, lr_0 = 2.2913e-04
Loss = 3.5799e-01, PNorm = 61.9723, GNorm = 1.7731, lr_0 = 2.2898e-04
Loss = 3.6304e-01, PNorm = 61.9722, GNorm = 1.2673, lr_0 = 2.2882e-04
Loss = 3.5947e-01, PNorm = 61.9731, GNorm = 1.7888, lr_0 = 2.2866e-04
Loss = 3.4981e-01, PNorm = 61.9755, GNorm = 1.5100, lr_0 = 2.2851e-04
Loss = 3.2121e-01, PNorm = 61.9786, GNorm = 1.2067, lr_0 = 2.2835e-04
Loss = 3.8916e-01, PNorm = 61.9820, GNorm = 1.8564, lr_0 = 2.2819e-04
Loss = 4.1527e-01, PNorm = 61.9856, GNorm = 1.9261, lr_0 = 2.2804e-04
Loss = 3.6983e-01, PNorm = 61.9892, GNorm = 1.2985, lr_0 = 2.2788e-04
Loss = 3.3978e-01, PNorm = 61.9910, GNorm = 1.4126, lr_0 = 2.2773e-04
Loss = 3.4548e-01, PNorm = 61.9951, GNorm = 1.7114, lr_0 = 2.2757e-04
Validation mae = 0.111999
Epoch 20
Loss = 3.3626e-01, PNorm = 61.9992, GNorm = 2.2309, lr_0 = 2.2741e-04
Loss = 3.6925e-01, PNorm = 62.0029, GNorm = 1.4699, lr_0 = 2.2726e-04
Loss = 3.4493e-01, PNorm = 62.0046, GNorm = 0.9521, lr_0 = 2.2710e-04
Loss = 3.3212e-01, PNorm = 62.0059, GNorm = 1.8280, lr_0 = 2.2695e-04
Loss = 3.9478e-01, PNorm = 62.0090, GNorm = 1.8198, lr_0 = 2.2679e-04
Loss = 3.5268e-01, PNorm = 62.0103, GNorm = 1.4074, lr_0 = 2.2664e-04
Loss = 3.8256e-01, PNorm = 62.0131, GNorm = 1.5297, lr_0 = 2.2648e-04
Loss = 3.5337e-01, PNorm = 62.0165, GNorm = 2.2352, lr_0 = 2.2632e-04
Loss = 3.4465e-01, PNorm = 62.0181, GNorm = 1.3799, lr_0 = 2.2617e-04
Loss = 3.6139e-01, PNorm = 62.0181, GNorm = 1.5715, lr_0 = 2.2601e-04
Loss = 3.2084e-01, PNorm = 62.0181, GNorm = 1.5941, lr_0 = 2.2586e-04
Loss = 4.2359e-01, PNorm = 62.0235, GNorm = 2.3960, lr_0 = 2.2571e-04
Loss = 3.3714e-01, PNorm = 62.0275, GNorm = 1.3938, lr_0 = 2.2555e-04
Loss = 3.6426e-01, PNorm = 62.0294, GNorm = 1.8366, lr_0 = 2.2540e-04
Loss = 3.9627e-01, PNorm = 62.0312, GNorm = 2.3784, lr_0 = 2.2524e-04
Loss = 3.2427e-01, PNorm = 62.0325, GNorm = 1.1414, lr_0 = 2.2509e-04
Loss = 3.5870e-01, PNorm = 62.0378, GNorm = 1.3442, lr_0 = 2.2493e-04
Loss = 4.0836e-01, PNorm = 62.0409, GNorm = 1.6014, lr_0 = 2.2478e-04
Loss = 3.8290e-01, PNorm = 62.0434, GNorm = 1.3353, lr_0 = 2.2463e-04
Loss = 3.3544e-01, PNorm = 62.0452, GNorm = 1.1769, lr_0 = 2.2447e-04
Loss = 3.5175e-01, PNorm = 62.0469, GNorm = 1.4985, lr_0 = 2.2432e-04
Loss = 3.5659e-01, PNorm = 62.0463, GNorm = 1.7782, lr_0 = 2.2416e-04
Loss = 3.3941e-01, PNorm = 62.0470, GNorm = 0.8871, lr_0 = 2.2401e-04
Loss = 3.1578e-01, PNorm = 62.0458, GNorm = 1.8526, lr_0 = 2.2386e-04
Loss = 3.0585e-01, PNorm = 62.0507, GNorm = 1.4119, lr_0 = 2.2370e-04
Loss = 3.2020e-01, PNorm = 62.0542, GNorm = 1.5847, lr_0 = 2.2355e-04
Loss = 3.5433e-01, PNorm = 62.0561, GNorm = 1.8553, lr_0 = 2.2340e-04
Loss = 3.6018e-01, PNorm = 62.0615, GNorm = 1.1803, lr_0 = 2.2324e-04
Loss = 3.5848e-01, PNorm = 62.0627, GNorm = 1.3098, lr_0 = 2.2309e-04
Loss = 3.5747e-01, PNorm = 62.0641, GNorm = 1.1669, lr_0 = 2.2294e-04
Loss = 3.3813e-01, PNorm = 62.0667, GNorm = 1.2133, lr_0 = 2.2279e-04
Loss = 3.4147e-01, PNorm = 62.0702, GNorm = 1.4594, lr_0 = 2.2263e-04
Loss = 3.7528e-01, PNorm = 62.0697, GNorm = 2.2682, lr_0 = 2.2248e-04
Loss = 3.5843e-01, PNorm = 62.0745, GNorm = 1.9353, lr_0 = 2.2233e-04
Loss = 3.3629e-01, PNorm = 62.0792, GNorm = 1.2168, lr_0 = 2.2218e-04
Loss = 3.2502e-01, PNorm = 62.0803, GNorm = 1.3621, lr_0 = 2.2202e-04
Loss = 3.3698e-01, PNorm = 62.0865, GNorm = 1.3291, lr_0 = 2.2187e-04
Loss = 3.3335e-01, PNorm = 62.0922, GNorm = 1.5276, lr_0 = 2.2172e-04
Loss = 3.5334e-01, PNorm = 62.0944, GNorm = 1.5857, lr_0 = 2.2157e-04
Loss = 3.3297e-01, PNorm = 62.0969, GNorm = 1.1165, lr_0 = 2.2142e-04
Loss = 3.3532e-01, PNorm = 62.0987, GNorm = 0.9790, lr_0 = 2.2126e-04
Loss = 3.4573e-01, PNorm = 62.0996, GNorm = 1.3548, lr_0 = 2.2111e-04
Loss = 2.9790e-01, PNorm = 62.1002, GNorm = 1.1111, lr_0 = 2.2096e-04
Loss = 3.6281e-01, PNorm = 62.1015, GNorm = 1.6792, lr_0 = 2.2081e-04
Loss = 3.4245e-01, PNorm = 62.1019, GNorm = 1.2154, lr_0 = 2.2066e-04
Loss = 3.4196e-01, PNorm = 62.1035, GNorm = 1.5074, lr_0 = 2.2051e-04
Loss = 3.8832e-01, PNorm = 62.1070, GNorm = 1.5425, lr_0 = 2.2036e-04
Loss = 3.1713e-01, PNorm = 62.1089, GNorm = 1.6930, lr_0 = 2.2021e-04
Loss = 4.1117e-01, PNorm = 62.1091, GNorm = 2.0279, lr_0 = 2.2005e-04
Loss = 3.5015e-01, PNorm = 62.1120, GNorm = 1.1919, lr_0 = 2.1990e-04
Loss = 4.0546e-01, PNorm = 62.1137, GNorm = 1.2213, lr_0 = 2.1975e-04
Loss = 3.3732e-01, PNorm = 62.1185, GNorm = 1.6033, lr_0 = 2.1960e-04
Loss = 3.9061e-01, PNorm = 62.1217, GNorm = 1.5862, lr_0 = 2.1945e-04
Loss = 4.0600e-01, PNorm = 62.1266, GNorm = 1.2632, lr_0 = 2.1930e-04
Loss = 3.5423e-01, PNorm = 62.1271, GNorm = 1.1818, lr_0 = 2.1915e-04
Loss = 3.8425e-01, PNorm = 62.1286, GNorm = 1.2532, lr_0 = 2.1900e-04
Loss = 4.1405e-01, PNorm = 62.1335, GNorm = 2.6116, lr_0 = 2.1885e-04
Loss = 3.6071e-01, PNorm = 62.1398, GNorm = 2.1574, lr_0 = 2.1870e-04
Loss = 3.4397e-01, PNorm = 62.1403, GNorm = 1.4162, lr_0 = 2.1855e-04
Loss = 3.6686e-01, PNorm = 62.1436, GNorm = 1.6727, lr_0 = 2.1840e-04
Loss = 3.8460e-01, PNorm = 62.1470, GNorm = 1.2636, lr_0 = 2.1825e-04
Loss = 3.6909e-01, PNorm = 62.1495, GNorm = 1.6429, lr_0 = 2.1810e-04
Loss = 3.5080e-01, PNorm = 62.1517, GNorm = 1.6677, lr_0 = 2.1795e-04
Loss = 3.9494e-01, PNorm = 62.1503, GNorm = 1.5090, lr_0 = 2.1780e-04
Loss = 3.7721e-01, PNorm = 62.1538, GNorm = 1.7920, lr_0 = 2.1765e-04
Loss = 3.6441e-01, PNorm = 62.1580, GNorm = 1.4295, lr_0 = 2.1751e-04
Loss = 3.2636e-01, PNorm = 62.1603, GNorm = 1.5536, lr_0 = 2.1736e-04
Loss = 3.6643e-01, PNorm = 62.1649, GNorm = 1.5926, lr_0 = 2.1721e-04
Loss = 3.6904e-01, PNorm = 62.1688, GNorm = 1.3767, lr_0 = 2.1706e-04
Loss = 3.4323e-01, PNorm = 62.1718, GNorm = 1.4552, lr_0 = 2.1691e-04
Loss = 3.5275e-01, PNorm = 62.1754, GNorm = 1.2874, lr_0 = 2.1676e-04
Loss = 3.1951e-01, PNorm = 62.1736, GNorm = 1.2179, lr_0 = 2.1661e-04
Loss = 4.3502e-01, PNorm = 62.1692, GNorm = 1.7632, lr_0 = 2.1646e-04
Loss = 3.7866e-01, PNorm = 62.1745, GNorm = 1.0947, lr_0 = 2.1632e-04
Loss = 3.5084e-01, PNorm = 62.1801, GNorm = 2.0492, lr_0 = 2.1617e-04
Loss = 4.9399e-01, PNorm = 62.1804, GNorm = 1.2853, lr_0 = 2.1602e-04
Loss = 3.6560e-01, PNorm = 62.1864, GNorm = 1.3696, lr_0 = 2.1587e-04
Loss = 4.0146e-01, PNorm = 62.1900, GNorm = 1.6406, lr_0 = 2.1572e-04
Loss = 3.6089e-01, PNorm = 62.1925, GNorm = 1.7154, lr_0 = 2.1558e-04
Loss = 3.3410e-01, PNorm = 62.1964, GNorm = 1.2163, lr_0 = 2.1543e-04
Loss = 3.4898e-01, PNorm = 62.1981, GNorm = 1.3374, lr_0 = 2.1528e-04
Loss = 4.9240e-01, PNorm = 62.1995, GNorm = 1.8003, lr_0 = 2.1513e-04
Loss = 3.8313e-01, PNorm = 62.2003, GNorm = 1.0674, lr_0 = 2.1499e-04
Loss = 3.4456e-01, PNorm = 62.2020, GNorm = 1.2888, lr_0 = 2.1484e-04
Loss = 4.0127e-01, PNorm = 62.2054, GNorm = 1.5347, lr_0 = 2.1469e-04
Loss = 3.3083e-01, PNorm = 62.2114, GNorm = 1.8255, lr_0 = 2.1454e-04
Loss = 3.9899e-01, PNorm = 62.2117, GNorm = 1.4604, lr_0 = 2.1440e-04
Loss = 3.6800e-01, PNorm = 62.2137, GNorm = 1.2192, lr_0 = 2.1425e-04
Loss = 4.0777e-01, PNorm = 62.2154, GNorm = 1.3455, lr_0 = 2.1410e-04
Loss = 3.7396e-01, PNorm = 62.2193, GNorm = 1.4844, lr_0 = 2.1396e-04
Loss = 3.1673e-01, PNorm = 62.2216, GNorm = 1.2545, lr_0 = 2.1381e-04
Loss = 3.3296e-01, PNorm = 62.2237, GNorm = 1.0703, lr_0 = 2.1366e-04
Loss = 3.6717e-01, PNorm = 62.2257, GNorm = 1.6303, lr_0 = 2.1352e-04
Loss = 3.9555e-01, PNorm = 62.2291, GNorm = 1.5780, lr_0 = 2.1337e-04
Loss = 3.5558e-01, PNorm = 62.2310, GNorm = 1.3197, lr_0 = 2.1323e-04
Loss = 4.2035e-01, PNorm = 62.2336, GNorm = 2.0864, lr_0 = 2.1308e-04
Loss = 4.0297e-01, PNorm = 62.2352, GNorm = 2.3130, lr_0 = 2.1293e-04
Loss = 3.7424e-01, PNorm = 62.2388, GNorm = 1.3527, lr_0 = 2.1279e-04
Loss = 3.0771e-01, PNorm = 62.2407, GNorm = 1.3452, lr_0 = 2.1264e-04
Loss = 4.4027e-01, PNorm = 62.2403, GNorm = 1.3509, lr_0 = 2.1250e-04
Loss = 3.3198e-01, PNorm = 62.2436, GNorm = 1.2098, lr_0 = 2.1235e-04
Loss = 4.0166e-01, PNorm = 62.2455, GNorm = 1.7042, lr_0 = 2.1221e-04
Loss = 4.0371e-01, PNorm = 62.2469, GNorm = 1.5436, lr_0 = 2.1206e-04
Loss = 3.5940e-01, PNorm = 62.2509, GNorm = 1.1116, lr_0 = 2.1191e-04
Loss = 3.4525e-01, PNorm = 62.2556, GNorm = 1.2844, lr_0 = 2.1177e-04
Loss = 3.0796e-01, PNorm = 62.2601, GNorm = 1.6593, lr_0 = 2.1162e-04
Loss = 3.8920e-01, PNorm = 62.2618, GNorm = 1.8864, lr_0 = 2.1148e-04
Loss = 3.2419e-01, PNorm = 62.2646, GNorm = 1.4865, lr_0 = 2.1133e-04
Loss = 3.7351e-01, PNorm = 62.2673, GNorm = 1.3823, lr_0 = 2.1119e-04
Loss = 3.5295e-01, PNorm = 62.2683, GNorm = 1.8431, lr_0 = 2.1104e-04
Loss = 3.8368e-01, PNorm = 62.2686, GNorm = 1.1037, lr_0 = 2.1090e-04
Loss = 3.8420e-01, PNorm = 62.2713, GNorm = 1.2122, lr_0 = 2.1076e-04
Loss = 3.6660e-01, PNorm = 62.2730, GNorm = 1.4103, lr_0 = 2.1061e-04
Loss = 3.8326e-01, PNorm = 62.2752, GNorm = 1.7115, lr_0 = 2.1047e-04
Loss = 3.6483e-01, PNorm = 62.2763, GNorm = 1.7904, lr_0 = 2.1032e-04
Loss = 3.6909e-01, PNorm = 62.2799, GNorm = 1.5580, lr_0 = 2.1018e-04
Loss = 3.7696e-01, PNorm = 62.2844, GNorm = 1.4793, lr_0 = 2.1003e-04
Loss = 3.6583e-01, PNorm = 62.2865, GNorm = 1.1995, lr_0 = 2.0989e-04
Loss = 3.3573e-01, PNorm = 62.2882, GNorm = 1.1277, lr_0 = 2.0975e-04
Loss = 3.7463e-01, PNorm = 62.2914, GNorm = 1.3380, lr_0 = 2.0960e-04
Validation mae = 0.113730
Epoch 21
Loss = 3.9156e-01, PNorm = 62.2986, GNorm = 1.6478, lr_0 = 2.0946e-04
Loss = 3.4206e-01, PNorm = 62.3043, GNorm = 1.4465, lr_0 = 2.0932e-04
Loss = 3.5145e-01, PNorm = 62.3052, GNorm = 1.5394, lr_0 = 2.0917e-04
Loss = 3.6017e-01, PNorm = 62.3034, GNorm = 1.2399, lr_0 = 2.0903e-04
Loss = 3.6218e-01, PNorm = 62.3067, GNorm = 1.0878, lr_0 = 2.0889e-04
Loss = 3.6103e-01, PNorm = 62.3095, GNorm = 1.0779, lr_0 = 2.0874e-04
Loss = 3.2451e-01, PNorm = 62.3103, GNorm = 1.1057, lr_0 = 2.0860e-04
Loss = 3.8654e-01, PNorm = 62.3117, GNorm = 1.6095, lr_0 = 2.0846e-04
Loss = 3.4537e-01, PNorm = 62.3173, GNorm = 1.5996, lr_0 = 2.0831e-04
Loss = 3.2171e-01, PNorm = 62.3184, GNorm = 1.5918, lr_0 = 2.0817e-04
Loss = 3.5674e-01, PNorm = 62.3192, GNorm = 1.7714, lr_0 = 2.0803e-04
Loss = 3.9839e-01, PNorm = 62.3217, GNorm = 1.3517, lr_0 = 2.0789e-04
Loss = 3.6375e-01, PNorm = 62.3233, GNorm = 1.0873, lr_0 = 2.0774e-04
Loss = 3.4957e-01, PNorm = 62.3249, GNorm = 0.7645, lr_0 = 2.0760e-04
Loss = 3.9233e-01, PNorm = 62.3275, GNorm = 1.1233, lr_0 = 2.0746e-04
Loss = 3.7028e-01, PNorm = 62.3300, GNorm = 1.4629, lr_0 = 2.0732e-04
Loss = 3.6308e-01, PNorm = 62.3366, GNorm = 1.2671, lr_0 = 2.0718e-04
Loss = 3.7954e-01, PNorm = 62.3416, GNorm = 1.1466, lr_0 = 2.0703e-04
Loss = 3.8592e-01, PNorm = 62.3400, GNorm = 1.6913, lr_0 = 2.0689e-04
Loss = 3.8413e-01, PNorm = 62.3419, GNorm = 2.2200, lr_0 = 2.0675e-04
Loss = 3.6999e-01, PNorm = 62.3420, GNorm = 1.9312, lr_0 = 2.0661e-04
Loss = 3.9053e-01, PNorm = 62.3440, GNorm = 1.7125, lr_0 = 2.0647e-04
Loss = 3.3749e-01, PNorm = 62.3466, GNorm = 1.6268, lr_0 = 2.0633e-04
Loss = 3.1851e-01, PNorm = 62.3515, GNorm = 1.0225, lr_0 = 2.0618e-04
Loss = 3.8050e-01, PNorm = 62.3517, GNorm = 1.9497, lr_0 = 2.0604e-04
Loss = 3.3482e-01, PNorm = 62.3528, GNorm = 1.4764, lr_0 = 2.0590e-04
Loss = 4.0416e-01, PNorm = 62.3576, GNorm = 1.3708, lr_0 = 2.0576e-04
Loss = 3.1136e-01, PNorm = 62.3631, GNorm = 1.3500, lr_0 = 2.0562e-04
Loss = 3.7577e-01, PNorm = 62.3648, GNorm = 1.2981, lr_0 = 2.0548e-04
Loss = 3.6880e-01, PNorm = 62.3644, GNorm = 1.7863, lr_0 = 2.0534e-04
Loss = 3.8701e-01, PNorm = 62.3643, GNorm = 1.5820, lr_0 = 2.0520e-04
Loss = 3.4302e-01, PNorm = 62.3644, GNorm = 1.4247, lr_0 = 2.0506e-04
Loss = 3.7259e-01, PNorm = 62.3682, GNorm = 1.3689, lr_0 = 2.0492e-04
Loss = 3.6068e-01, PNorm = 62.3725, GNorm = 1.2826, lr_0 = 2.0478e-04
Loss = 3.7900e-01, PNorm = 62.3758, GNorm = 1.3685, lr_0 = 2.0464e-04
Loss = 3.5934e-01, PNorm = 62.3763, GNorm = 1.7069, lr_0 = 2.0450e-04
Loss = 3.5060e-01, PNorm = 62.3794, GNorm = 1.9600, lr_0 = 2.0436e-04
Loss = 3.7226e-01, PNorm = 62.3816, GNorm = 1.3318, lr_0 = 2.0422e-04
Loss = 4.0421e-01, PNorm = 62.3836, GNorm = 1.2790, lr_0 = 2.0408e-04
Loss = 4.0170e-01, PNorm = 62.3866, GNorm = 2.0716, lr_0 = 2.0394e-04
Loss = 3.7019e-01, PNorm = 62.3908, GNorm = 1.1757, lr_0 = 2.0380e-04
Loss = 4.0548e-01, PNorm = 62.3898, GNorm = 1.4555, lr_0 = 2.0366e-04
Loss = 3.7525e-01, PNorm = 62.3926, GNorm = 1.4715, lr_0 = 2.0352e-04
Loss = 3.3181e-01, PNorm = 62.3951, GNorm = 1.8499, lr_0 = 2.0338e-04
Loss = 3.6244e-01, PNorm = 62.3986, GNorm = 1.8270, lr_0 = 2.0324e-04
Loss = 3.7846e-01, PNorm = 62.4029, GNorm = 1.0987, lr_0 = 2.0310e-04
Loss = 3.5839e-01, PNorm = 62.4040, GNorm = 1.4116, lr_0 = 2.0296e-04
Loss = 3.0443e-01, PNorm = 62.4074, GNorm = 1.3309, lr_0 = 2.0282e-04
Loss = 3.2136e-01, PNorm = 62.4084, GNorm = 1.1556, lr_0 = 2.0268e-04
Loss = 3.3551e-01, PNorm = 62.4095, GNorm = 1.1989, lr_0 = 2.0254e-04
Loss = 3.3337e-01, PNorm = 62.4131, GNorm = 1.0657, lr_0 = 2.0240e-04
Loss = 3.9769e-01, PNorm = 62.4162, GNorm = 1.2342, lr_0 = 2.0227e-04
Loss = 3.0995e-01, PNorm = 62.4170, GNorm = 1.1902, lr_0 = 2.0213e-04
Loss = 3.8833e-01, PNorm = 62.4205, GNorm = 1.9777, lr_0 = 2.0199e-04
Loss = 3.3947e-01, PNorm = 62.4198, GNorm = 1.1265, lr_0 = 2.0185e-04
Loss = 3.3311e-01, PNorm = 62.4242, GNorm = 1.4637, lr_0 = 2.0171e-04
Loss = 3.6283e-01, PNorm = 62.4285, GNorm = 1.7157, lr_0 = 2.0157e-04
Loss = 3.9865e-01, PNorm = 62.4307, GNorm = 1.3994, lr_0 = 2.0144e-04
Loss = 3.2177e-01, PNorm = 62.4314, GNorm = 1.1745, lr_0 = 2.0130e-04
Loss = 3.5939e-01, PNorm = 62.4336, GNorm = 1.1348, lr_0 = 2.0116e-04
Loss = 3.7968e-01, PNorm = 62.4337, GNorm = 1.1339, lr_0 = 2.0102e-04
Loss = 3.7062e-01, PNorm = 62.4363, GNorm = 1.4690, lr_0 = 2.0088e-04
Loss = 3.7939e-01, PNorm = 62.4412, GNorm = 1.2496, lr_0 = 2.0075e-04
Loss = 3.5878e-01, PNorm = 62.4422, GNorm = 1.4391, lr_0 = 2.0061e-04
Loss = 3.9231e-01, PNorm = 62.4423, GNorm = 1.2082, lr_0 = 2.0047e-04
Loss = 3.5629e-01, PNorm = 62.4467, GNorm = 1.4832, lr_0 = 2.0033e-04
Loss = 3.8247e-01, PNorm = 62.4491, GNorm = 1.6004, lr_0 = 2.0020e-04
Loss = 3.8336e-01, PNorm = 62.4518, GNorm = 1.3898, lr_0 = 2.0006e-04
Loss = 3.5506e-01, PNorm = 62.4565, GNorm = 1.7987, lr_0 = 1.9992e-04
Loss = 3.5235e-01, PNorm = 62.4598, GNorm = 1.3385, lr_0 = 1.9979e-04
Loss = 3.4436e-01, PNorm = 62.4631, GNorm = 1.0649, lr_0 = 1.9965e-04
Loss = 3.6394e-01, PNorm = 62.4652, GNorm = 1.4119, lr_0 = 1.9951e-04
Loss = 3.6920e-01, PNorm = 62.4693, GNorm = 1.4767, lr_0 = 1.9938e-04
Loss = 3.0620e-01, PNorm = 62.4668, GNorm = 0.9425, lr_0 = 1.9924e-04
Loss = 4.1280e-01, PNorm = 62.4682, GNorm = 1.4630, lr_0 = 1.9910e-04
Loss = 3.5956e-01, PNorm = 62.4721, GNorm = 1.6491, lr_0 = 1.9897e-04
Loss = 3.5088e-01, PNorm = 62.4730, GNorm = 1.2548, lr_0 = 1.9883e-04
Loss = 3.3897e-01, PNorm = 62.4720, GNorm = 1.0860, lr_0 = 1.9869e-04
Loss = 3.6818e-01, PNorm = 62.4757, GNorm = 1.4916, lr_0 = 1.9856e-04
Loss = 3.2082e-01, PNorm = 62.4785, GNorm = 1.1471, lr_0 = 1.9842e-04
Loss = 3.6600e-01, PNorm = 62.4790, GNorm = 1.8063, lr_0 = 1.9829e-04
Loss = 3.1611e-01, PNorm = 62.4807, GNorm = 1.6020, lr_0 = 1.9815e-04
Loss = 3.7860e-01, PNorm = 62.4805, GNorm = 1.6009, lr_0 = 1.9801e-04
Loss = 3.5318e-01, PNorm = 62.4810, GNorm = 1.3599, lr_0 = 1.9788e-04
Loss = 3.5132e-01, PNorm = 62.4808, GNorm = 1.1438, lr_0 = 1.9774e-04
Loss = 3.5270e-01, PNorm = 62.4848, GNorm = 1.5583, lr_0 = 1.9761e-04
Loss = 3.8415e-01, PNorm = 62.4864, GNorm = 1.0601, lr_0 = 1.9747e-04
Loss = 3.8622e-01, PNorm = 62.4883, GNorm = 1.3693, lr_0 = 1.9734e-04
Loss = 3.7247e-01, PNorm = 62.4920, GNorm = 1.1107, lr_0 = 1.9720e-04
Loss = 3.6552e-01, PNorm = 62.4912, GNorm = 1.2017, lr_0 = 1.9707e-04
Loss = 3.4702e-01, PNorm = 62.4913, GNorm = 1.3051, lr_0 = 1.9693e-04
Loss = 3.7207e-01, PNorm = 62.4937, GNorm = 1.4560, lr_0 = 1.9680e-04
Loss = 4.0727e-01, PNorm = 62.4948, GNorm = 1.5029, lr_0 = 1.9666e-04
Loss = 3.6194e-01, PNorm = 62.4969, GNorm = 1.5409, lr_0 = 1.9653e-04
Loss = 3.9601e-01, PNorm = 62.4991, GNorm = 1.5118, lr_0 = 1.9639e-04
Loss = 3.9482e-01, PNorm = 62.5010, GNorm = 1.3230, lr_0 = 1.9626e-04
Loss = 3.1873e-01, PNorm = 62.5016, GNorm = 1.1115, lr_0 = 1.9612e-04
Loss = 3.5641e-01, PNorm = 62.5018, GNorm = 1.1188, lr_0 = 1.9599e-04
Loss = 3.1691e-01, PNorm = 62.5049, GNorm = 1.9212, lr_0 = 1.9585e-04
Loss = 3.3157e-01, PNorm = 62.5037, GNorm = 1.6001, lr_0 = 1.9572e-04
Loss = 3.2161e-01, PNorm = 62.5068, GNorm = 1.7100, lr_0 = 1.9559e-04
Loss = 3.3392e-01, PNorm = 62.5096, GNorm = 1.4971, lr_0 = 1.9545e-04
Loss = 3.7523e-01, PNorm = 62.5101, GNorm = 1.5292, lr_0 = 1.9532e-04
Loss = 4.1405e-01, PNorm = 62.5119, GNorm = 1.4933, lr_0 = 1.9518e-04
Loss = 3.8450e-01, PNorm = 62.5148, GNorm = 1.8164, lr_0 = 1.9505e-04
Loss = 3.4262e-01, PNorm = 62.5175, GNorm = 1.2094, lr_0 = 1.9492e-04
Loss = 3.7102e-01, PNorm = 62.5186, GNorm = 1.4544, lr_0 = 1.9478e-04
Loss = 3.3414e-01, PNorm = 62.5213, GNorm = 1.2414, lr_0 = 1.9465e-04
Loss = 3.8730e-01, PNorm = 62.5223, GNorm = 1.8815, lr_0 = 1.9452e-04
Loss = 3.2115e-01, PNorm = 62.5239, GNorm = 1.6193, lr_0 = 1.9438e-04
Loss = 3.5241e-01, PNorm = 62.5233, GNorm = 1.3303, lr_0 = 1.9425e-04
Loss = 3.7390e-01, PNorm = 62.5258, GNorm = 1.5264, lr_0 = 1.9412e-04
Loss = 3.8899e-01, PNorm = 62.5308, GNorm = 1.9196, lr_0 = 1.9398e-04
Loss = 3.3243e-01, PNorm = 62.5318, GNorm = 1.6270, lr_0 = 1.9385e-04
Loss = 3.4278e-01, PNorm = 62.5332, GNorm = 1.0733, lr_0 = 1.9372e-04
Loss = 3.6628e-01, PNorm = 62.5365, GNorm = 1.7807, lr_0 = 1.9359e-04
Loss = 3.8620e-01, PNorm = 62.5375, GNorm = 1.7718, lr_0 = 1.9345e-04
Loss = 3.4774e-01, PNorm = 62.5383, GNorm = 1.3263, lr_0 = 1.9332e-04
Loss = 3.6481e-01, PNorm = 62.5383, GNorm = 1.0902, lr_0 = 1.9319e-04
Loss = 3.6695e-01, PNorm = 62.5393, GNorm = 1.4923, lr_0 = 1.9306e-04
Validation mae = 0.111304
Epoch 22
Loss = 3.3552e-01, PNorm = 62.5417, GNorm = 1.3850, lr_0 = 1.9292e-04
Loss = 3.8665e-01, PNorm = 62.5433, GNorm = 1.5735, lr_0 = 1.9279e-04
Loss = 3.4791e-01, PNorm = 62.5460, GNorm = 1.4802, lr_0 = 1.9266e-04
Loss = 3.5937e-01, PNorm = 62.5487, GNorm = 1.3907, lr_0 = 1.9253e-04
Loss = 3.5933e-01, PNorm = 62.5491, GNorm = 1.3863, lr_0 = 1.9240e-04
Loss = 3.7018e-01, PNorm = 62.5520, GNorm = 1.2828, lr_0 = 1.9226e-04
Loss = 3.6864e-01, PNorm = 62.5543, GNorm = 2.4414, lr_0 = 1.9213e-04
Loss = 3.1697e-01, PNorm = 62.5561, GNorm = 1.4001, lr_0 = 1.9200e-04
Loss = 3.7471e-01, PNorm = 62.5567, GNorm = 1.5545, lr_0 = 1.9187e-04
Loss = 3.4807e-01, PNorm = 62.5579, GNorm = 1.4578, lr_0 = 1.9174e-04
Loss = 3.3131e-01, PNorm = 62.5598, GNorm = 1.6448, lr_0 = 1.9161e-04
Loss = 3.9535e-01, PNorm = 62.5611, GNorm = 1.4403, lr_0 = 1.9148e-04
Loss = 3.6030e-01, PNorm = 62.5647, GNorm = 1.2289, lr_0 = 1.9134e-04
Loss = 3.5865e-01, PNorm = 62.5669, GNorm = 1.6587, lr_0 = 1.9121e-04
Loss = 3.1467e-01, PNorm = 62.5657, GNorm = 1.7160, lr_0 = 1.9108e-04
Loss = 3.7361e-01, PNorm = 62.5668, GNorm = 1.4719, lr_0 = 1.9095e-04
Loss = 3.4152e-01, PNorm = 62.5713, GNorm = 1.1059, lr_0 = 1.9082e-04
Loss = 3.1797e-01, PNorm = 62.5725, GNorm = 1.1724, lr_0 = 1.9069e-04
Loss = 3.6093e-01, PNorm = 62.5722, GNorm = 1.2296, lr_0 = 1.9056e-04
Loss = 3.4571e-01, PNorm = 62.5742, GNorm = 1.8670, lr_0 = 1.9043e-04
Loss = 3.9458e-01, PNorm = 62.5732, GNorm = 1.7473, lr_0 = 1.9030e-04
Loss = 3.4591e-01, PNorm = 62.5744, GNorm = 1.3553, lr_0 = 1.9017e-04
Loss = 3.9730e-01, PNorm = 62.5725, GNorm = 1.4553, lr_0 = 1.9004e-04
Loss = 4.0236e-01, PNorm = 62.5740, GNorm = 1.3416, lr_0 = 1.8991e-04
Loss = 4.0963e-01, PNorm = 62.5775, GNorm = 2.0389, lr_0 = 1.8978e-04
Loss = 3.5455e-01, PNorm = 62.5818, GNorm = 1.1747, lr_0 = 1.8965e-04
Loss = 3.3902e-01, PNorm = 62.5850, GNorm = 1.6101, lr_0 = 1.8952e-04
Loss = 3.3653e-01, PNorm = 62.5870, GNorm = 1.4328, lr_0 = 1.8939e-04
Loss = 3.1283e-01, PNorm = 62.5881, GNorm = 1.1477, lr_0 = 1.8926e-04
Loss = 3.3328e-01, PNorm = 62.5880, GNorm = 1.4254, lr_0 = 1.8913e-04
Loss = 4.8804e-01, PNorm = 62.5913, GNorm = 1.8734, lr_0 = 1.8900e-04
Loss = 3.2581e-01, PNorm = 62.5927, GNorm = 1.5156, lr_0 = 1.8887e-04
Loss = 3.4026e-01, PNorm = 62.5957, GNorm = 1.0248, lr_0 = 1.8874e-04
Loss = 4.1371e-01, PNorm = 62.6007, GNorm = 1.4655, lr_0 = 1.8861e-04
Loss = 3.5419e-01, PNorm = 62.6044, GNorm = 1.2275, lr_0 = 1.8848e-04
Loss = 3.4943e-01, PNorm = 62.6079, GNorm = 1.2262, lr_0 = 1.8835e-04
Loss = 3.7418e-01, PNorm = 62.6087, GNorm = 1.7894, lr_0 = 1.8822e-04
Loss = 4.2283e-01, PNorm = 62.6061, GNorm = 1.6849, lr_0 = 1.8809e-04
Loss = 3.8014e-01, PNorm = 62.6079, GNorm = 1.3798, lr_0 = 1.8797e-04
Loss = 3.3867e-01, PNorm = 62.6105, GNorm = 1.1176, lr_0 = 1.8784e-04
Loss = 3.7248e-01, PNorm = 62.6128, GNorm = 1.5392, lr_0 = 1.8771e-04
Loss = 4.0901e-01, PNorm = 62.6122, GNorm = 1.5652, lr_0 = 1.8758e-04
Loss = 3.4893e-01, PNorm = 62.6154, GNorm = 1.1534, lr_0 = 1.8745e-04
Loss = 3.1014e-01, PNorm = 62.6194, GNorm = 1.3602, lr_0 = 1.8732e-04
Loss = 3.8269e-01, PNorm = 62.6211, GNorm = 1.3624, lr_0 = 1.8719e-04
Loss = 3.7429e-01, PNorm = 62.6240, GNorm = 1.3281, lr_0 = 1.8707e-04
Loss = 3.3626e-01, PNorm = 62.6278, GNorm = 1.8577, lr_0 = 1.8694e-04
Loss = 3.3848e-01, PNorm = 62.6272, GNorm = 1.5709, lr_0 = 1.8681e-04
Loss = 3.4833e-01, PNorm = 62.6263, GNorm = 1.6460, lr_0 = 1.8668e-04
Loss = 3.1459e-01, PNorm = 62.6293, GNorm = 1.2409, lr_0 = 1.8655e-04
Loss = 2.9854e-01, PNorm = 62.6316, GNorm = 0.9943, lr_0 = 1.8643e-04
Loss = 3.8720e-01, PNorm = 62.6336, GNorm = 1.4672, lr_0 = 1.8630e-04
Loss = 3.7902e-01, PNorm = 62.6393, GNorm = 1.4794, lr_0 = 1.8617e-04
Loss = 3.5722e-01, PNorm = 62.6411, GNorm = 1.7129, lr_0 = 1.8604e-04
Loss = 3.5283e-01, PNorm = 62.6439, GNorm = 1.4235, lr_0 = 1.8592e-04
Loss = 3.6528e-01, PNorm = 62.6471, GNorm = 1.3447, lr_0 = 1.8579e-04
Loss = 3.4812e-01, PNorm = 62.6510, GNorm = 1.5769, lr_0 = 1.8566e-04
Loss = 3.7877e-01, PNorm = 62.6544, GNorm = 1.3159, lr_0 = 1.8553e-04
Loss = 3.2718e-01, PNorm = 62.6565, GNorm = 1.4875, lr_0 = 1.8541e-04
Loss = 3.2121e-01, PNorm = 62.6600, GNorm = 1.2243, lr_0 = 1.8528e-04
Loss = 3.7626e-01, PNorm = 62.6615, GNorm = 1.1290, lr_0 = 1.8515e-04
Loss = 3.5221e-01, PNorm = 62.6609, GNorm = 0.9743, lr_0 = 1.8503e-04
Loss = 3.4293e-01, PNorm = 62.6655, GNorm = 1.2049, lr_0 = 1.8490e-04
Loss = 3.3057e-01, PNorm = 62.6695, GNorm = 1.3885, lr_0 = 1.8477e-04
Loss = 4.0791e-01, PNorm = 62.6685, GNorm = 2.5694, lr_0 = 1.8465e-04
Loss = 3.5757e-01, PNorm = 62.6709, GNorm = 1.1987, lr_0 = 1.8452e-04
Loss = 3.2596e-01, PNorm = 62.6739, GNorm = 1.4405, lr_0 = 1.8439e-04
Loss = 3.6890e-01, PNorm = 62.6752, GNorm = 2.2209, lr_0 = 1.8427e-04
Loss = 3.7410e-01, PNorm = 62.6786, GNorm = 1.3546, lr_0 = 1.8414e-04
Loss = 3.4068e-01, PNorm = 62.6780, GNorm = 1.7566, lr_0 = 1.8401e-04
Loss = 3.0509e-01, PNorm = 62.6786, GNorm = 1.4352, lr_0 = 1.8389e-04
Loss = 3.3384e-01, PNorm = 62.6809, GNorm = 1.5042, lr_0 = 1.8376e-04
Loss = 3.6667e-01, PNorm = 62.6820, GNorm = 1.9150, lr_0 = 1.8364e-04
Loss = 3.2522e-01, PNorm = 62.6853, GNorm = 1.3562, lr_0 = 1.8351e-04
Loss = 3.4281e-01, PNorm = 62.6847, GNorm = 1.2779, lr_0 = 1.8338e-04
Loss = 3.4572e-01, PNorm = 62.6856, GNorm = 1.8911, lr_0 = 1.8326e-04
Loss = 3.0463e-01, PNorm = 62.6867, GNorm = 1.6241, lr_0 = 1.8313e-04
Loss = 3.4187e-01, PNorm = 62.6903, GNorm = 1.1702, lr_0 = 1.8301e-04
Loss = 3.3853e-01, PNorm = 62.6938, GNorm = 1.2316, lr_0 = 1.8288e-04
Loss = 3.6245e-01, PNorm = 62.6998, GNorm = 1.7352, lr_0 = 1.8276e-04
Loss = 4.0528e-01, PNorm = 62.7028, GNorm = 1.8639, lr_0 = 1.8263e-04
Loss = 3.7386e-01, PNorm = 62.7040, GNorm = 2.2492, lr_0 = 1.8251e-04
Loss = 4.0986e-01, PNorm = 62.7079, GNorm = 1.1146, lr_0 = 1.8238e-04
Loss = 3.5839e-01, PNorm = 62.7085, GNorm = 1.4015, lr_0 = 1.8226e-04
Loss = 4.3229e-01, PNorm = 62.7112, GNorm = 1.6129, lr_0 = 1.8213e-04
Loss = 3.9463e-01, PNorm = 62.7149, GNorm = 2.2659, lr_0 = 1.8201e-04
Loss = 3.4315e-01, PNorm = 62.7170, GNorm = 1.7930, lr_0 = 1.8188e-04
Loss = 3.5317e-01, PNorm = 62.7181, GNorm = 1.1943, lr_0 = 1.8176e-04
Loss = 3.3319e-01, PNorm = 62.7210, GNorm = 1.4998, lr_0 = 1.8163e-04
Loss = 3.2147e-01, PNorm = 62.7224, GNorm = 1.1179, lr_0 = 1.8151e-04
Loss = 3.1725e-01, PNorm = 62.7230, GNorm = 2.3174, lr_0 = 1.8138e-04
Loss = 4.1687e-01, PNorm = 62.7249, GNorm = 1.9386, lr_0 = 1.8126e-04
Loss = 3.6561e-01, PNorm = 62.7286, GNorm = 1.3350, lr_0 = 1.8114e-04
Loss = 3.5892e-01, PNorm = 62.7285, GNorm = 1.3107, lr_0 = 1.8101e-04
Loss = 3.7726e-01, PNorm = 62.7302, GNorm = 1.4488, lr_0 = 1.8089e-04
Loss = 3.6324e-01, PNorm = 62.7330, GNorm = 1.2471, lr_0 = 1.8076e-04
Loss = 3.5090e-01, PNorm = 62.7338, GNorm = 2.1486, lr_0 = 1.8064e-04
Loss = 3.6301e-01, PNorm = 62.7379, GNorm = 2.3677, lr_0 = 1.8052e-04
Loss = 3.5652e-01, PNorm = 62.7382, GNorm = 1.6023, lr_0 = 1.8039e-04
Loss = 3.6421e-01, PNorm = 62.7407, GNorm = 1.7581, lr_0 = 1.8027e-04
Loss = 3.2997e-01, PNorm = 62.7436, GNorm = 1.0109, lr_0 = 1.8015e-04
Loss = 3.3435e-01, PNorm = 62.7429, GNorm = 1.8459, lr_0 = 1.8002e-04
Loss = 3.9442e-01, PNorm = 62.7414, GNorm = 1.0488, lr_0 = 1.7990e-04
Loss = 3.4530e-01, PNorm = 62.7458, GNorm = 1.2220, lr_0 = 1.7978e-04
Loss = 3.0447e-01, PNorm = 62.7485, GNorm = 1.2856, lr_0 = 1.7965e-04
Loss = 3.7887e-01, PNorm = 62.7499, GNorm = 2.1570, lr_0 = 1.7953e-04
Loss = 3.3449e-01, PNorm = 62.7525, GNorm = 1.2768, lr_0 = 1.7941e-04
Loss = 3.6037e-01, PNorm = 62.7553, GNorm = 1.2595, lr_0 = 1.7928e-04
Loss = 3.6130e-01, PNorm = 62.7555, GNorm = 1.6665, lr_0 = 1.7916e-04
Loss = 3.5223e-01, PNorm = 62.7555, GNorm = 1.3650, lr_0 = 1.7904e-04
Loss = 3.9537e-01, PNorm = 62.7559, GNorm = 1.7010, lr_0 = 1.7892e-04
Loss = 3.4743e-01, PNorm = 62.7602, GNorm = 0.9568, lr_0 = 1.7879e-04
Loss = 3.6207e-01, PNorm = 62.7635, GNorm = 1.5807, lr_0 = 1.7867e-04
Loss = 3.6290e-01, PNorm = 62.7681, GNorm = 1.3308, lr_0 = 1.7855e-04
Loss = 4.0009e-01, PNorm = 62.7713, GNorm = 1.2551, lr_0 = 1.7843e-04
Loss = 3.4249e-01, PNorm = 62.7738, GNorm = 1.9376, lr_0 = 1.7830e-04
Loss = 3.6285e-01, PNorm = 62.7757, GNorm = 1.6308, lr_0 = 1.7818e-04
Loss = 3.9203e-01, PNorm = 62.7775, GNorm = 1.4919, lr_0 = 1.7806e-04
Loss = 3.6274e-01, PNorm = 62.7802, GNorm = 1.6862, lr_0 = 1.7794e-04
Loss = 3.8349e-01, PNorm = 62.7796, GNorm = 1.6570, lr_0 = 1.7782e-04
Validation mae = 0.112189
Epoch 23
Loss = 3.4636e-01, PNorm = 62.7824, GNorm = 1.3447, lr_0 = 1.7769e-04
Loss = 3.4593e-01, PNorm = 62.7849, GNorm = 1.3834, lr_0 = 1.7757e-04
Loss = 3.8052e-01, PNorm = 62.7862, GNorm = 1.7642, lr_0 = 1.7745e-04
Loss = 3.4938e-01, PNorm = 62.7875, GNorm = 1.3268, lr_0 = 1.7733e-04
Loss = 3.8625e-01, PNorm = 62.7892, GNorm = 1.4938, lr_0 = 1.7721e-04
Loss = 4.0310e-01, PNorm = 62.7885, GNorm = 2.2905, lr_0 = 1.7709e-04
Loss = 3.4885e-01, PNorm = 62.7904, GNorm = 1.3389, lr_0 = 1.7696e-04
Loss = 3.2922e-01, PNorm = 62.7942, GNorm = 1.4878, lr_0 = 1.7684e-04
Loss = 3.9021e-01, PNorm = 62.7913, GNorm = 1.3986, lr_0 = 1.7672e-04
Loss = 3.4567e-01, PNorm = 62.7936, GNorm = 1.7638, lr_0 = 1.7660e-04
Loss = 3.4891e-01, PNorm = 62.7967, GNorm = 1.7942, lr_0 = 1.7648e-04
Loss = 3.5405e-01, PNorm = 62.7975, GNorm = 1.8089, lr_0 = 1.7636e-04
Loss = 3.3175e-01, PNorm = 62.7993, GNorm = 1.7054, lr_0 = 1.7624e-04
Loss = 3.4084e-01, PNorm = 62.8005, GNorm = 1.9373, lr_0 = 1.7612e-04
Loss = 2.9087e-01, PNorm = 62.8016, GNorm = 1.2606, lr_0 = 1.7600e-04
Loss = 3.1330e-01, PNorm = 62.8061, GNorm = 1.5545, lr_0 = 1.7588e-04
Loss = 3.5476e-01, PNorm = 62.8081, GNorm = 1.4877, lr_0 = 1.7576e-04
Loss = 3.6941e-01, PNorm = 62.8080, GNorm = 1.8392, lr_0 = 1.7564e-04
Loss = 3.9540e-01, PNorm = 62.8102, GNorm = 1.4932, lr_0 = 1.7552e-04
Loss = 3.4371e-01, PNorm = 62.8146, GNorm = 1.6103, lr_0 = 1.7540e-04
Loss = 3.9066e-01, PNorm = 62.8186, GNorm = 1.1506, lr_0 = 1.7528e-04
Loss = 3.2644e-01, PNorm = 62.8195, GNorm = 1.4386, lr_0 = 1.7516e-04
Loss = 3.6467e-01, PNorm = 62.8222, GNorm = 1.5700, lr_0 = 1.7504e-04
Loss = 3.4788e-01, PNorm = 62.8245, GNorm = 1.3622, lr_0 = 1.7492e-04
Loss = 3.5761e-01, PNorm = 62.8241, GNorm = 1.0577, lr_0 = 1.7480e-04
Loss = 3.2771e-01, PNorm = 62.8241, GNorm = 1.4671, lr_0 = 1.7468e-04
Loss = 3.5005e-01, PNorm = 62.8260, GNorm = 1.4319, lr_0 = 1.7456e-04
Loss = 3.6703e-01, PNorm = 62.8277, GNorm = 1.7700, lr_0 = 1.7444e-04
Loss = 3.5344e-01, PNorm = 62.8302, GNorm = 1.5523, lr_0 = 1.7432e-04
Loss = 3.8549e-01, PNorm = 62.8310, GNorm = 1.1117, lr_0 = 1.7420e-04
Loss = 4.0795e-01, PNorm = 62.8349, GNorm = 1.6490, lr_0 = 1.7408e-04
Loss = 3.1545e-01, PNorm = 62.8350, GNorm = 1.0327, lr_0 = 1.7396e-04
Loss = 4.1406e-01, PNorm = 62.8374, GNorm = 1.5655, lr_0 = 1.7384e-04
Loss = 4.0509e-01, PNorm = 62.8388, GNorm = 1.4507, lr_0 = 1.7372e-04
Loss = 3.2831e-01, PNorm = 62.8434, GNorm = 1.1215, lr_0 = 1.7360e-04
Loss = 3.5618e-01, PNorm = 62.8416, GNorm = 2.0655, lr_0 = 1.7348e-04
Loss = 4.1203e-01, PNorm = 62.8436, GNorm = 1.5355, lr_0 = 1.7336e-04
Loss = 3.8696e-01, PNorm = 62.8484, GNorm = 0.9584, lr_0 = 1.7325e-04
Loss = 3.3882e-01, PNorm = 62.8480, GNorm = 1.3548, lr_0 = 1.7313e-04
Loss = 3.4547e-01, PNorm = 62.8467, GNorm = 0.9413, lr_0 = 1.7301e-04
Loss = 3.7141e-01, PNorm = 62.8490, GNorm = 1.6856, lr_0 = 1.7289e-04
Loss = 3.5333e-01, PNorm = 62.8517, GNorm = 1.3005, lr_0 = 1.7277e-04
Loss = 3.6714e-01, PNorm = 62.8535, GNorm = 1.4810, lr_0 = 1.7265e-04
Loss = 3.2927e-01, PNorm = 62.8561, GNorm = 1.2673, lr_0 = 1.7253e-04
Loss = 3.7305e-01, PNorm = 62.8579, GNorm = 1.3036, lr_0 = 1.7242e-04
Loss = 4.1507e-01, PNorm = 62.8606, GNorm = 1.5703, lr_0 = 1.7230e-04
Loss = 3.4177e-01, PNorm = 62.8642, GNorm = 1.2805, lr_0 = 1.7218e-04
Loss = 3.7865e-01, PNorm = 62.8639, GNorm = 1.7465, lr_0 = 1.7206e-04
Loss = 3.0314e-01, PNorm = 62.8661, GNorm = 1.1580, lr_0 = 1.7194e-04
Loss = 3.4258e-01, PNorm = 62.8669, GNorm = 2.0205, lr_0 = 1.7183e-04
Loss = 3.8055e-01, PNorm = 62.8669, GNorm = 1.4527, lr_0 = 1.7171e-04
Loss = 3.8063e-01, PNorm = 62.8681, GNorm = 1.4452, lr_0 = 1.7159e-04
Loss = 3.8288e-01, PNorm = 62.8700, GNorm = 1.6515, lr_0 = 1.7147e-04
Loss = 3.8127e-01, PNorm = 62.8717, GNorm = 1.5114, lr_0 = 1.7136e-04
Loss = 3.8224e-01, PNorm = 62.8724, GNorm = 1.0395, lr_0 = 1.7124e-04
Loss = 3.7066e-01, PNorm = 62.8767, GNorm = 1.6383, lr_0 = 1.7112e-04
Loss = 3.4215e-01, PNorm = 62.8793, GNorm = 1.5984, lr_0 = 1.7100e-04
Loss = 3.2526e-01, PNorm = 62.8794, GNorm = 1.3624, lr_0 = 1.7089e-04
Loss = 3.8091e-01, PNorm = 62.8816, GNorm = 1.7998, lr_0 = 1.7077e-04
Loss = 3.3838e-01, PNorm = 62.8849, GNorm = 1.6376, lr_0 = 1.7065e-04
Loss = 3.9515e-01, PNorm = 62.8836, GNorm = 2.0008, lr_0 = 1.7054e-04
Loss = 3.3201e-01, PNorm = 62.8860, GNorm = 1.4953, lr_0 = 1.7042e-04
Loss = 3.9715e-01, PNorm = 62.8892, GNorm = 1.9243, lr_0 = 1.7030e-04
Loss = 3.4158e-01, PNorm = 62.8896, GNorm = 1.3771, lr_0 = 1.7019e-04
Loss = 2.9977e-01, PNorm = 62.8925, GNorm = 0.9577, lr_0 = 1.7007e-04
Loss = 3.7074e-01, PNorm = 62.8917, GNorm = 1.2530, lr_0 = 1.6995e-04
Loss = 3.2524e-01, PNorm = 62.8919, GNorm = 1.3584, lr_0 = 1.6984e-04
Loss = 3.6764e-01, PNorm = 62.8938, GNorm = 1.6901, lr_0 = 1.6972e-04
Loss = 3.6603e-01, PNorm = 62.8959, GNorm = 1.6184, lr_0 = 1.6960e-04
Loss = 3.9966e-01, PNorm = 62.8959, GNorm = 1.2705, lr_0 = 1.6949e-04
Loss = 3.5674e-01, PNorm = 62.8971, GNorm = 1.5401, lr_0 = 1.6937e-04
Loss = 3.2271e-01, PNorm = 62.8999, GNorm = 1.4744, lr_0 = 1.6926e-04
Loss = 3.2068e-01, PNorm = 62.9033, GNorm = 1.6043, lr_0 = 1.6914e-04
Loss = 3.7453e-01, PNorm = 62.9051, GNorm = 1.5612, lr_0 = 1.6902e-04
Loss = 3.4984e-01, PNorm = 62.9070, GNorm = 1.7641, lr_0 = 1.6891e-04
Loss = 3.7435e-01, PNorm = 62.9094, GNorm = 1.7603, lr_0 = 1.6879e-04
Loss = 3.4420e-01, PNorm = 62.9122, GNorm = 1.4982, lr_0 = 1.6868e-04
Loss = 3.6271e-01, PNorm = 62.9132, GNorm = 1.2992, lr_0 = 1.6856e-04
Loss = 3.6558e-01, PNorm = 62.9114, GNorm = 1.4048, lr_0 = 1.6845e-04
Loss = 3.7795e-01, PNorm = 62.9125, GNorm = 1.9918, lr_0 = 1.6833e-04
Loss = 3.4686e-01, PNorm = 62.9135, GNorm = 1.0413, lr_0 = 1.6821e-04
Loss = 3.4345e-01, PNorm = 62.9155, GNorm = 1.2156, lr_0 = 1.6810e-04
Loss = 3.0168e-01, PNorm = 62.9161, GNorm = 1.7000, lr_0 = 1.6798e-04
Loss = 3.7055e-01, PNorm = 62.9144, GNorm = 1.8124, lr_0 = 1.6787e-04
Loss = 3.5181e-01, PNorm = 62.9155, GNorm = 1.1233, lr_0 = 1.6775e-04
Loss = 3.4077e-01, PNorm = 62.9175, GNorm = 2.4401, lr_0 = 1.6764e-04
Loss = 3.1204e-01, PNorm = 62.9210, GNorm = 1.2625, lr_0 = 1.6752e-04
Loss = 3.4614e-01, PNorm = 62.9227, GNorm = 1.4627, lr_0 = 1.6741e-04
Loss = 3.7649e-01, PNorm = 62.9251, GNorm = 1.4980, lr_0 = 1.6729e-04
Loss = 3.5306e-01, PNorm = 62.9291, GNorm = 1.8141, lr_0 = 1.6718e-04
Loss = 3.3751e-01, PNorm = 62.9307, GNorm = 2.1940, lr_0 = 1.6707e-04
Loss = 3.6393e-01, PNorm = 62.9320, GNorm = 2.9346, lr_0 = 1.6695e-04
Loss = 3.5036e-01, PNorm = 62.9338, GNorm = 1.5958, lr_0 = 1.6684e-04
Loss = 3.2072e-01, PNorm = 62.9369, GNorm = 1.0775, lr_0 = 1.6672e-04
Loss = 3.2316e-01, PNorm = 62.9387, GNorm = 1.4108, lr_0 = 1.6661e-04
Loss = 3.3329e-01, PNorm = 62.9375, GNorm = 1.6803, lr_0 = 1.6649e-04
Loss = 3.4290e-01, PNorm = 62.9367, GNorm = 1.8189, lr_0 = 1.6638e-04
Loss = 2.9421e-01, PNorm = 62.9400, GNorm = 1.3004, lr_0 = 1.6627e-04
Loss = 4.0225e-01, PNorm = 62.9416, GNorm = 2.3667, lr_0 = 1.6615e-04
Loss = 3.7215e-01, PNorm = 62.9429, GNorm = 1.1411, lr_0 = 1.6604e-04
Loss = 3.2895e-01, PNorm = 62.9450, GNorm = 1.1759, lr_0 = 1.6592e-04
Loss = 3.7057e-01, PNorm = 62.9476, GNorm = 1.6916, lr_0 = 1.6581e-04
Loss = 3.0016e-01, PNorm = 62.9491, GNorm = 1.3833, lr_0 = 1.6570e-04
Loss = 3.7666e-01, PNorm = 62.9513, GNorm = 1.5391, lr_0 = 1.6558e-04
Loss = 3.6504e-01, PNorm = 62.9529, GNorm = 1.6713, lr_0 = 1.6547e-04
Loss = 3.2921e-01, PNorm = 62.9573, GNorm = 1.5494, lr_0 = 1.6536e-04
Loss = 3.6299e-01, PNorm = 62.9583, GNorm = 1.3557, lr_0 = 1.6524e-04
Loss = 3.2933e-01, PNorm = 62.9590, GNorm = 1.3765, lr_0 = 1.6513e-04
Loss = 3.7022e-01, PNorm = 62.9598, GNorm = 1.2365, lr_0 = 1.6502e-04
Loss = 3.2435e-01, PNorm = 62.9633, GNorm = 1.2228, lr_0 = 1.6490e-04
Loss = 3.3719e-01, PNorm = 62.9673, GNorm = 1.7636, lr_0 = 1.6479e-04
Loss = 3.4275e-01, PNorm = 62.9693, GNorm = 1.3255, lr_0 = 1.6468e-04
Loss = 3.6417e-01, PNorm = 62.9686, GNorm = 1.5990, lr_0 = 1.6457e-04
Loss = 3.6156e-01, PNorm = 62.9726, GNorm = 1.7005, lr_0 = 1.6445e-04
Loss = 3.4563e-01, PNorm = 62.9732, GNorm = 1.3819, lr_0 = 1.6434e-04
Loss = 3.8431e-01, PNorm = 62.9721, GNorm = 1.7464, lr_0 = 1.6423e-04
Loss = 4.0453e-01, PNorm = 62.9754, GNorm = 1.8430, lr_0 = 1.6412e-04
Loss = 3.4259e-01, PNorm = 62.9802, GNorm = 1.4337, lr_0 = 1.6400e-04
Loss = 3.9878e-01, PNorm = 62.9817, GNorm = 1.5049, lr_0 = 1.6389e-04
Loss = 3.5546e-01, PNorm = 62.9826, GNorm = 1.4513, lr_0 = 1.6378e-04
Validation mae = 0.111911
Epoch 24
Loss = 3.2124e-01, PNorm = 62.9856, GNorm = 1.0340, lr_0 = 1.6367e-04
Loss = 3.6327e-01, PNorm = 62.9888, GNorm = 1.2707, lr_0 = 1.6355e-04
Loss = 3.7184e-01, PNorm = 62.9888, GNorm = 1.2175, lr_0 = 1.6344e-04
Loss = 3.3912e-01, PNorm = 62.9915, GNorm = 1.8371, lr_0 = 1.6333e-04
Loss = 3.4738e-01, PNorm = 62.9932, GNorm = 1.6350, lr_0 = 1.6322e-04
Loss = 3.9144e-01, PNorm = 62.9930, GNorm = 1.4380, lr_0 = 1.6311e-04
Loss = 3.2041e-01, PNorm = 62.9934, GNorm = 1.4444, lr_0 = 1.6299e-04
Loss = 3.0553e-01, PNorm = 62.9951, GNorm = 1.3297, lr_0 = 1.6288e-04
Loss = 3.7425e-01, PNorm = 62.9951, GNorm = 2.4406, lr_0 = 1.6277e-04
Loss = 3.1715e-01, PNorm = 62.9970, GNorm = 1.2621, lr_0 = 1.6266e-04
Loss = 3.5615e-01, PNorm = 62.9993, GNorm = 1.2510, lr_0 = 1.6255e-04
Loss = 3.3996e-01, PNorm = 63.0026, GNorm = 1.3701, lr_0 = 1.6244e-04
Loss = 3.6243e-01, PNorm = 63.0017, GNorm = 2.2585, lr_0 = 1.6233e-04
Loss = 3.5209e-01, PNorm = 63.0017, GNorm = 1.5046, lr_0 = 1.6221e-04
Loss = 3.6096e-01, PNorm = 63.0043, GNorm = 1.6943, lr_0 = 1.6210e-04
Loss = 3.5456e-01, PNorm = 63.0046, GNorm = 1.5776, lr_0 = 1.6199e-04
Loss = 3.4228e-01, PNorm = 63.0060, GNorm = 1.3937, lr_0 = 1.6188e-04
Loss = 2.9376e-01, PNorm = 63.0097, GNorm = 1.2542, lr_0 = 1.6177e-04
Loss = 4.0596e-01, PNorm = 63.0103, GNorm = 1.4097, lr_0 = 1.6166e-04
Loss = 3.1740e-01, PNorm = 63.0105, GNorm = 1.1639, lr_0 = 1.6155e-04
Loss = 3.6114e-01, PNorm = 63.0120, GNorm = 1.5318, lr_0 = 1.6144e-04
Loss = 3.0661e-01, PNorm = 63.0116, GNorm = 1.2215, lr_0 = 1.6133e-04
Loss = 3.9651e-01, PNorm = 63.0133, GNorm = 2.3123, lr_0 = 1.6122e-04
Loss = 3.2586e-01, PNorm = 63.0165, GNorm = 1.0811, lr_0 = 1.6111e-04
Loss = 3.5335e-01, PNorm = 63.0165, GNorm = 1.7470, lr_0 = 1.6100e-04
Loss = 3.7645e-01, PNorm = 63.0193, GNorm = 1.4358, lr_0 = 1.6089e-04
Loss = 3.1912e-01, PNorm = 63.0203, GNorm = 1.3526, lr_0 = 1.6078e-04
Loss = 3.4742e-01, PNorm = 63.0233, GNorm = 2.1936, lr_0 = 1.6067e-04
Loss = 3.0911e-01, PNorm = 63.0241, GNorm = 1.0131, lr_0 = 1.6056e-04
Loss = 3.3407e-01, PNorm = 63.0245, GNorm = 1.2838, lr_0 = 1.6045e-04
Loss = 3.9221e-01, PNorm = 63.0266, GNorm = 1.3896, lr_0 = 1.6034e-04
Loss = 3.5188e-01, PNorm = 63.0275, GNorm = 1.6971, lr_0 = 1.6023e-04
Loss = 3.4955e-01, PNorm = 63.0289, GNorm = 1.9894, lr_0 = 1.6012e-04
Loss = 3.5368e-01, PNorm = 63.0309, GNorm = 1.6165, lr_0 = 1.6001e-04
Loss = 3.0111e-01, PNorm = 63.0343, GNorm = 1.3744, lr_0 = 1.5990e-04
Loss = 3.4433e-01, PNorm = 63.0353, GNorm = 1.5255, lr_0 = 1.5979e-04
Loss = 3.6042e-01, PNorm = 63.0369, GNorm = 1.7917, lr_0 = 1.5968e-04
Loss = 3.4617e-01, PNorm = 63.0397, GNorm = 1.5381, lr_0 = 1.5957e-04
Loss = 3.1461e-01, PNorm = 63.0413, GNorm = 1.5975, lr_0 = 1.5946e-04
Loss = 3.2668e-01, PNorm = 63.0433, GNorm = 1.3559, lr_0 = 1.5935e-04
Loss = 3.6141e-01, PNorm = 63.0465, GNorm = 1.7567, lr_0 = 1.5924e-04
Loss = 3.9643e-01, PNorm = 63.0496, GNorm = 1.8819, lr_0 = 1.5913e-04
Loss = 3.7913e-01, PNorm = 63.0514, GNorm = 1.2515, lr_0 = 1.5902e-04
Loss = 3.0746e-01, PNorm = 63.0534, GNorm = 1.2032, lr_0 = 1.5891e-04
Loss = 3.4941e-01, PNorm = 63.0559, GNorm = 1.2590, lr_0 = 1.5880e-04
Loss = 3.2648e-01, PNorm = 63.0571, GNorm = 1.4939, lr_0 = 1.5870e-04
Loss = 3.8226e-01, PNorm = 63.0580, GNorm = 2.2241, lr_0 = 1.5859e-04
Loss = 3.6641e-01, PNorm = 63.0590, GNorm = 2.0441, lr_0 = 1.5848e-04
Loss = 3.6096e-01, PNorm = 63.0606, GNorm = 1.5285, lr_0 = 1.5837e-04
Loss = 3.5192e-01, PNorm = 63.0614, GNorm = 2.6215, lr_0 = 1.5826e-04
Loss = 3.5431e-01, PNorm = 63.0632, GNorm = 1.1700, lr_0 = 1.5815e-04
Loss = 3.4740e-01, PNorm = 63.0674, GNorm = 1.1611, lr_0 = 1.5804e-04
Loss = 2.9297e-01, PNorm = 63.0681, GNorm = 1.4408, lr_0 = 1.5794e-04
Loss = 3.2161e-01, PNorm = 63.0682, GNorm = 1.6805, lr_0 = 1.5783e-04
Loss = 3.6622e-01, PNorm = 63.0711, GNorm = 1.3819, lr_0 = 1.5772e-04
Loss = 4.0339e-01, PNorm = 63.0729, GNorm = 1.2440, lr_0 = 1.5761e-04
Loss = 2.9665e-01, PNorm = 63.0760, GNorm = 1.4265, lr_0 = 1.5750e-04
Loss = 3.4153e-01, PNorm = 63.0767, GNorm = 1.1255, lr_0 = 1.5740e-04
Loss = 3.0085e-01, PNorm = 63.0766, GNorm = 1.3906, lr_0 = 1.5729e-04
Loss = 4.1789e-01, PNorm = 63.0776, GNorm = 1.2783, lr_0 = 1.5718e-04
Loss = 3.6564e-01, PNorm = 63.0799, GNorm = 1.1712, lr_0 = 1.5707e-04
Loss = 3.7423e-01, PNorm = 63.0803, GNorm = 1.1645, lr_0 = 1.5697e-04
Loss = 3.3932e-01, PNorm = 63.0814, GNorm = 1.2095, lr_0 = 1.5686e-04
Loss = 3.5506e-01, PNorm = 63.0810, GNorm = 1.6469, lr_0 = 1.5675e-04
Loss = 3.6838e-01, PNorm = 63.0849, GNorm = 1.2207, lr_0 = 1.5664e-04
Loss = 3.3081e-01, PNorm = 63.0854, GNorm = 1.9371, lr_0 = 1.5654e-04
Loss = 3.4192e-01, PNorm = 63.0876, GNorm = 1.3793, lr_0 = 1.5643e-04
Loss = 3.5311e-01, PNorm = 63.0901, GNorm = 1.7341, lr_0 = 1.5632e-04
Loss = 2.8260e-01, PNorm = 63.0909, GNorm = 1.4654, lr_0 = 1.5621e-04
Loss = 3.6777e-01, PNorm = 63.0928, GNorm = 2.7466, lr_0 = 1.5611e-04
Loss = 3.5353e-01, PNorm = 63.0942, GNorm = 1.3382, lr_0 = 1.5600e-04
Loss = 3.1453e-01, PNorm = 63.0950, GNorm = 1.4474, lr_0 = 1.5589e-04
Loss = 3.3656e-01, PNorm = 63.0971, GNorm = 1.5770, lr_0 = 1.5579e-04
Loss = 3.7021e-01, PNorm = 63.0980, GNorm = 1.4137, lr_0 = 1.5568e-04
Loss = 3.6820e-01, PNorm = 63.0995, GNorm = 1.8304, lr_0 = 1.5557e-04
Loss = 3.5963e-01, PNorm = 63.1002, GNorm = 1.2402, lr_0 = 1.5547e-04
Loss = 3.6426e-01, PNorm = 63.1016, GNorm = 1.3581, lr_0 = 1.5536e-04
Loss = 3.5109e-01, PNorm = 63.1034, GNorm = 1.1444, lr_0 = 1.5525e-04
Loss = 3.6330e-01, PNorm = 63.1065, GNorm = 1.6521, lr_0 = 1.5515e-04
Loss = 3.7503e-01, PNorm = 63.1066, GNorm = 1.4897, lr_0 = 1.5504e-04
Loss = 3.8094e-01, PNorm = 63.1065, GNorm = 1.7227, lr_0 = 1.5493e-04
Loss = 3.6725e-01, PNorm = 63.1086, GNorm = 1.6283, lr_0 = 1.5483e-04
Loss = 3.7092e-01, PNorm = 63.1107, GNorm = 1.6279, lr_0 = 1.5472e-04
Loss = 4.0578e-01, PNorm = 63.1119, GNorm = 1.1752, lr_0 = 1.5462e-04
Loss = 3.4107e-01, PNorm = 63.1140, GNorm = 1.7452, lr_0 = 1.5451e-04
Loss = 3.3839e-01, PNorm = 63.1175, GNorm = 1.5666, lr_0 = 1.5440e-04
Loss = 3.2827e-01, PNorm = 63.1173, GNorm = 1.0012, lr_0 = 1.5430e-04
Loss = 3.6698e-01, PNorm = 63.1177, GNorm = 1.3717, lr_0 = 1.5419e-04
Loss = 3.3785e-01, PNorm = 63.1208, GNorm = 1.3840, lr_0 = 1.5409e-04
Loss = 3.6401e-01, PNorm = 63.1234, GNorm = 1.8138, lr_0 = 1.5398e-04
Loss = 3.5401e-01, PNorm = 63.1222, GNorm = 1.2616, lr_0 = 1.5388e-04
Loss = 3.4623e-01, PNorm = 63.1221, GNorm = 1.2761, lr_0 = 1.5377e-04
Loss = 3.3698e-01, PNorm = 63.1230, GNorm = 0.9427, lr_0 = 1.5367e-04
Loss = 2.8500e-01, PNorm = 63.1242, GNorm = 1.5701, lr_0 = 1.5356e-04
Loss = 3.2802e-01, PNorm = 63.1261, GNorm = 1.4672, lr_0 = 1.5346e-04
Loss = 3.1321e-01, PNorm = 63.1296, GNorm = 1.6218, lr_0 = 1.5335e-04
Loss = 3.8187e-01, PNorm = 63.1320, GNorm = 1.3491, lr_0 = 1.5325e-04
Loss = 3.7099e-01, PNorm = 63.1338, GNorm = 1.8850, lr_0 = 1.5314e-04
Loss = 3.2182e-01, PNorm = 63.1347, GNorm = 1.0182, lr_0 = 1.5304e-04
Loss = 3.7575e-01, PNorm = 63.1348, GNorm = 2.4230, lr_0 = 1.5293e-04
Loss = 3.8624e-01, PNorm = 63.1361, GNorm = 2.4261, lr_0 = 1.5283e-04
Loss = 3.6731e-01, PNorm = 63.1346, GNorm = 0.9210, lr_0 = 1.5272e-04
Loss = 3.7056e-01, PNorm = 63.1372, GNorm = 1.7515, lr_0 = 1.5262e-04
Loss = 3.4444e-01, PNorm = 63.1393, GNorm = 1.8110, lr_0 = 1.5251e-04
Loss = 3.3226e-01, PNorm = 63.1381, GNorm = 1.5845, lr_0 = 1.5241e-04
Loss = 3.8545e-01, PNorm = 63.1398, GNorm = 1.9403, lr_0 = 1.5230e-04
Loss = 3.3725e-01, PNorm = 63.1425, GNorm = 1.1262, lr_0 = 1.5220e-04
Loss = 3.5252e-01, PNorm = 63.1443, GNorm = 1.5595, lr_0 = 1.5209e-04
Loss = 3.7714e-01, PNorm = 63.1464, GNorm = 1.5041, lr_0 = 1.5199e-04
Loss = 3.6533e-01, PNorm = 63.1484, GNorm = 2.5467, lr_0 = 1.5189e-04
Loss = 3.5619e-01, PNorm = 63.1487, GNorm = 1.3444, lr_0 = 1.5178e-04
Loss = 3.3246e-01, PNorm = 63.1489, GNorm = 1.6236, lr_0 = 1.5168e-04
Loss = 3.4901e-01, PNorm = 63.1507, GNorm = 1.2135, lr_0 = 1.5157e-04
Loss = 3.6937e-01, PNorm = 63.1540, GNorm = 1.1815, lr_0 = 1.5147e-04
Loss = 3.3558e-01, PNorm = 63.1563, GNorm = 1.4109, lr_0 = 1.5137e-04
Loss = 3.3875e-01, PNorm = 63.1561, GNorm = 1.0866, lr_0 = 1.5126e-04
Loss = 3.9356e-01, PNorm = 63.1578, GNorm = 1.7231, lr_0 = 1.5116e-04
Loss = 3.7071e-01, PNorm = 63.1601, GNorm = 1.8864, lr_0 = 1.5106e-04
Loss = 3.7681e-01, PNorm = 63.1612, GNorm = 1.5578, lr_0 = 1.5095e-04
Loss = 3.9388e-01, PNorm = 63.1614, GNorm = 1.8203, lr_0 = 1.5085e-04
Validation mae = 0.112526
Epoch 25
Loss = 3.3422e-01, PNorm = 63.1629, GNorm = 1.3359, lr_0 = 1.5075e-04
Loss = 3.2610e-01, PNorm = 63.1633, GNorm = 1.7324, lr_0 = 1.5064e-04
Loss = 3.4812e-01, PNorm = 63.1616, GNorm = 1.4432, lr_0 = 1.5054e-04
Loss = 3.6115e-01, PNorm = 63.1639, GNorm = 1.3606, lr_0 = 1.5044e-04
Loss = 3.7609e-01, PNorm = 63.1686, GNorm = 1.1916, lr_0 = 1.5033e-04
Loss = 3.5382e-01, PNorm = 63.1719, GNorm = 1.9022, lr_0 = 1.5023e-04
Loss = 3.7431e-01, PNorm = 63.1749, GNorm = 1.5030, lr_0 = 1.5013e-04
Loss = 3.5055e-01, PNorm = 63.1748, GNorm = 1.9797, lr_0 = 1.5002e-04
Loss = 3.6040e-01, PNorm = 63.1737, GNorm = 1.7784, lr_0 = 1.4992e-04
Loss = 3.0942e-01, PNorm = 63.1773, GNorm = 1.3783, lr_0 = 1.4982e-04
Loss = 3.3705e-01, PNorm = 63.1788, GNorm = 1.3196, lr_0 = 1.4972e-04
Loss = 3.6713e-01, PNorm = 63.1796, GNorm = 1.6075, lr_0 = 1.4961e-04
Loss = 3.9105e-01, PNorm = 63.1802, GNorm = 1.2477, lr_0 = 1.4951e-04
Loss = 3.2448e-01, PNorm = 63.1828, GNorm = 2.3691, lr_0 = 1.4941e-04
Loss = 3.3863e-01, PNorm = 63.1856, GNorm = 2.0228, lr_0 = 1.4931e-04
Loss = 3.0677e-01, PNorm = 63.1858, GNorm = 1.4945, lr_0 = 1.4920e-04
Loss = 3.4666e-01, PNorm = 63.1877, GNorm = 1.4503, lr_0 = 1.4910e-04
Loss = 3.5718e-01, PNorm = 63.1889, GNorm = 1.9960, lr_0 = 1.4900e-04
Loss = 2.8525e-01, PNorm = 63.1876, GNorm = 1.3512, lr_0 = 1.4890e-04
Loss = 3.6838e-01, PNorm = 63.1873, GNorm = 1.0423, lr_0 = 1.4880e-04
Loss = 3.2434e-01, PNorm = 63.1894, GNorm = 1.0823, lr_0 = 1.4869e-04
Loss = 3.1571e-01, PNorm = 63.1930, GNorm = 1.0892, lr_0 = 1.4859e-04
Loss = 3.3820e-01, PNorm = 63.1937, GNorm = 1.2341, lr_0 = 1.4849e-04
Loss = 4.4203e-01, PNorm = 63.1935, GNorm = 1.5273, lr_0 = 1.4839e-04
Loss = 3.5828e-01, PNorm = 63.1955, GNorm = 1.6946, lr_0 = 1.4829e-04
Loss = 3.6286e-01, PNorm = 63.1956, GNorm = 1.3597, lr_0 = 1.4818e-04
Loss = 3.8515e-01, PNorm = 63.1963, GNorm = 1.6951, lr_0 = 1.4808e-04
Loss = 3.7097e-01, PNorm = 63.1987, GNorm = 1.3335, lr_0 = 1.4798e-04
Loss = 3.2284e-01, PNorm = 63.2006, GNorm = 1.2432, lr_0 = 1.4788e-04
Loss = 3.4597e-01, PNorm = 63.2038, GNorm = 1.3143, lr_0 = 1.4778e-04
Loss = 3.6146e-01, PNorm = 63.2062, GNorm = 1.4255, lr_0 = 1.4768e-04
Loss = 3.6798e-01, PNorm = 63.2064, GNorm = 1.7825, lr_0 = 1.4758e-04
Loss = 3.5251e-01, PNorm = 63.2091, GNorm = 1.3216, lr_0 = 1.4748e-04
Loss = 3.3125e-01, PNorm = 63.2096, GNorm = 1.3263, lr_0 = 1.4737e-04
Loss = 3.2265e-01, PNorm = 63.2118, GNorm = 1.7534, lr_0 = 1.4727e-04
Loss = 3.7850e-01, PNorm = 63.2127, GNorm = 1.3980, lr_0 = 1.4717e-04
Loss = 3.3840e-01, PNorm = 63.2134, GNorm = 1.2726, lr_0 = 1.4707e-04
Loss = 2.7967e-01, PNorm = 63.2146, GNorm = 1.0920, lr_0 = 1.4697e-04
Loss = 3.6020e-01, PNorm = 63.2157, GNorm = 1.5167, lr_0 = 1.4687e-04
Loss = 3.2030e-01, PNorm = 63.2171, GNorm = 1.0871, lr_0 = 1.4677e-04
Loss = 3.2757e-01, PNorm = 63.2176, GNorm = 1.4640, lr_0 = 1.4667e-04
Loss = 3.9329e-01, PNorm = 63.2168, GNorm = 1.7369, lr_0 = 1.4657e-04
Loss = 3.1197e-01, PNorm = 63.2173, GNorm = 2.0275, lr_0 = 1.4647e-04
Loss = 3.6545e-01, PNorm = 63.2199, GNorm = 1.3995, lr_0 = 1.4637e-04
Loss = 3.8111e-01, PNorm = 63.2210, GNorm = 1.1399, lr_0 = 1.4627e-04
Loss = 3.2610e-01, PNorm = 63.2243, GNorm = 1.4774, lr_0 = 1.4617e-04
Loss = 3.2981e-01, PNorm = 63.2275, GNorm = 1.4499, lr_0 = 1.4607e-04
Loss = 3.2526e-01, PNorm = 63.2300, GNorm = 1.4726, lr_0 = 1.4597e-04
Loss = 3.6114e-01, PNorm = 63.2287, GNorm = 1.3432, lr_0 = 1.4587e-04
Loss = 3.6415e-01, PNorm = 63.2299, GNorm = 1.3477, lr_0 = 1.4577e-04
Loss = 3.4201e-01, PNorm = 63.2333, GNorm = 1.8591, lr_0 = 1.4567e-04
Loss = 3.6879e-01, PNorm = 63.2384, GNorm = 2.3970, lr_0 = 1.4557e-04
Loss = 3.4509e-01, PNorm = 63.2377, GNorm = 2.0132, lr_0 = 1.4547e-04
Loss = 3.6697e-01, PNorm = 63.2387, GNorm = 1.2968, lr_0 = 1.4537e-04
Loss = 3.5682e-01, PNorm = 63.2418, GNorm = 1.4029, lr_0 = 1.4527e-04
Loss = 3.4551e-01, PNorm = 63.2421, GNorm = 1.2271, lr_0 = 1.4517e-04
Loss = 3.7712e-01, PNorm = 63.2411, GNorm = 1.6919, lr_0 = 1.4507e-04
Loss = 3.8876e-01, PNorm = 63.2439, GNorm = 1.5029, lr_0 = 1.4497e-04
Loss = 3.4511e-01, PNorm = 63.2439, GNorm = 1.2409, lr_0 = 1.4487e-04
Loss = 3.4435e-01, PNorm = 63.2447, GNorm = 1.4721, lr_0 = 1.4477e-04
Loss = 3.6647e-01, PNorm = 63.2466, GNorm = 1.3352, lr_0 = 1.4467e-04
Loss = 3.8292e-01, PNorm = 63.2477, GNorm = 1.6895, lr_0 = 1.4457e-04
Loss = 3.4146e-01, PNorm = 63.2486, GNorm = 1.4506, lr_0 = 1.4447e-04
Loss = 3.4167e-01, PNorm = 63.2508, GNorm = 1.1507, lr_0 = 1.4438e-04
Loss = 3.2916e-01, PNorm = 63.2517, GNorm = 1.6137, lr_0 = 1.4428e-04
Loss = 3.3780e-01, PNorm = 63.2536, GNorm = 1.8231, lr_0 = 1.4418e-04
Loss = 3.2138e-01, PNorm = 63.2524, GNorm = 1.7096, lr_0 = 1.4408e-04
Loss = 3.5889e-01, PNorm = 63.2519, GNorm = 1.2934, lr_0 = 1.4398e-04
Loss = 3.2973e-01, PNorm = 63.2532, GNorm = 1.7459, lr_0 = 1.4388e-04
Loss = 3.2398e-01, PNorm = 63.2558, GNorm = 1.2731, lr_0 = 1.4378e-04
Loss = 3.3459e-01, PNorm = 63.2580, GNorm = 1.8110, lr_0 = 1.4368e-04
Loss = 3.2250e-01, PNorm = 63.2604, GNorm = 1.6693, lr_0 = 1.4359e-04
Loss = 3.6854e-01, PNorm = 63.2600, GNorm = 1.3695, lr_0 = 1.4349e-04
Loss = 4.1245e-01, PNorm = 63.2593, GNorm = 1.6694, lr_0 = 1.4339e-04
Loss = 3.3217e-01, PNorm = 63.2588, GNorm = 1.9956, lr_0 = 1.4329e-04
Loss = 3.2408e-01, PNorm = 63.2604, GNorm = 1.3825, lr_0 = 1.4319e-04
Loss = 3.6164e-01, PNorm = 63.2596, GNorm = 1.3263, lr_0 = 1.4310e-04
Loss = 3.3366e-01, PNorm = 63.2588, GNorm = 1.5200, lr_0 = 1.4300e-04
Loss = 3.7333e-01, PNorm = 63.2596, GNorm = 1.4523, lr_0 = 1.4290e-04
Loss = 3.6727e-01, PNorm = 63.2618, GNorm = 1.5176, lr_0 = 1.4280e-04
Loss = 4.0234e-01, PNorm = 63.2629, GNorm = 1.7096, lr_0 = 1.4270e-04
Loss = 3.6406e-01, PNorm = 63.2624, GNorm = 1.6056, lr_0 = 1.4261e-04
Loss = 3.5740e-01, PNorm = 63.2658, GNorm = 1.3153, lr_0 = 1.4251e-04
Loss = 3.7420e-01, PNorm = 63.2670, GNorm = 1.2881, lr_0 = 1.4241e-04
Loss = 3.0762e-01, PNorm = 63.2658, GNorm = 1.4055, lr_0 = 1.4231e-04
Loss = 3.6677e-01, PNorm = 63.2668, GNorm = 1.5331, lr_0 = 1.4222e-04
Loss = 3.1510e-01, PNorm = 63.2699, GNorm = 2.1634, lr_0 = 1.4212e-04
Loss = 3.1919e-01, PNorm = 63.2703, GNorm = 1.3299, lr_0 = 1.4202e-04
Loss = 3.4199e-01, PNorm = 63.2704, GNorm = 1.7938, lr_0 = 1.4192e-04
Loss = 3.2252e-01, PNorm = 63.2711, GNorm = 1.8785, lr_0 = 1.4183e-04
Loss = 4.0099e-01, PNorm = 63.2700, GNorm = 1.0389, lr_0 = 1.4173e-04
Loss = 3.4093e-01, PNorm = 63.2713, GNorm = 1.6971, lr_0 = 1.4163e-04
Loss = 3.5954e-01, PNorm = 63.2735, GNorm = 1.9817, lr_0 = 1.4153e-04
Loss = 3.5318e-01, PNorm = 63.2742, GNorm = 1.2056, lr_0 = 1.4144e-04
Loss = 3.4007e-01, PNorm = 63.2730, GNorm = 1.1622, lr_0 = 1.4134e-04
Loss = 3.9722e-01, PNorm = 63.2740, GNorm = 1.8571, lr_0 = 1.4124e-04
Loss = 3.3000e-01, PNorm = 63.2784, GNorm = 1.3148, lr_0 = 1.4115e-04
Loss = 3.2656e-01, PNorm = 63.2795, GNorm = 1.7056, lr_0 = 1.4105e-04
Loss = 3.5600e-01, PNorm = 63.2773, GNorm = 0.8886, lr_0 = 1.4095e-04
Loss = 3.6429e-01, PNorm = 63.2777, GNorm = 1.0410, lr_0 = 1.4086e-04
Loss = 3.7286e-01, PNorm = 63.2802, GNorm = 1.3577, lr_0 = 1.4076e-04
Loss = 3.2985e-01, PNorm = 63.2818, GNorm = 1.5203, lr_0 = 1.4066e-04
Loss = 3.5421e-01, PNorm = 63.2858, GNorm = 1.3875, lr_0 = 1.4057e-04
Loss = 3.2152e-01, PNorm = 63.2884, GNorm = 1.8960, lr_0 = 1.4047e-04
Loss = 3.4460e-01, PNorm = 63.2877, GNorm = 1.3940, lr_0 = 1.4038e-04
Loss = 3.0525e-01, PNorm = 63.2914, GNorm = 1.1849, lr_0 = 1.4028e-04
Loss = 4.3704e-01, PNorm = 63.2924, GNorm = 1.6490, lr_0 = 1.4018e-04
Loss = 3.6238e-01, PNorm = 63.2921, GNorm = 1.4514, lr_0 = 1.4009e-04
Loss = 3.1501e-01, PNorm = 63.2931, GNorm = 0.9575, lr_0 = 1.3999e-04
Loss = 4.6541e-01, PNorm = 63.2932, GNorm = 2.0085, lr_0 = 1.3990e-04
Loss = 3.3009e-01, PNorm = 63.2944, GNorm = 1.0131, lr_0 = 1.3980e-04
Loss = 3.0850e-01, PNorm = 63.2949, GNorm = 1.5152, lr_0 = 1.3970e-04
Loss = 3.2172e-01, PNorm = 63.2957, GNorm = 1.4432, lr_0 = 1.3961e-04
Loss = 3.6417e-01, PNorm = 63.2974, GNorm = 1.5203, lr_0 = 1.3951e-04
Loss = 3.6977e-01, PNorm = 63.2992, GNorm = 1.9519, lr_0 = 1.3942e-04
Loss = 3.6916e-01, PNorm = 63.3023, GNorm = 1.5825, lr_0 = 1.3932e-04
Loss = 3.4793e-01, PNorm = 63.3029, GNorm = 1.8939, lr_0 = 1.3923e-04
Loss = 4.0906e-01, PNorm = 63.3041, GNorm = 1.5827, lr_0 = 1.3913e-04
Loss = 3.6153e-01, PNorm = 63.3084, GNorm = 2.0410, lr_0 = 1.3904e-04
Loss = 3.5464e-01, PNorm = 63.3119, GNorm = 1.5855, lr_0 = 1.3894e-04
Validation mae = 0.111595
Epoch 26
Loss = 3.0883e-01, PNorm = 63.3125, GNorm = 1.5674, lr_0 = 1.3884e-04
Loss = 3.4522e-01, PNorm = 63.3148, GNorm = 1.3255, lr_0 = 1.3875e-04
Loss = 3.2080e-01, PNorm = 63.3163, GNorm = 1.4231, lr_0 = 1.3865e-04
Loss = 3.2014e-01, PNorm = 63.3172, GNorm = 1.4289, lr_0 = 1.3856e-04
Loss = 3.8001e-01, PNorm = 63.3170, GNorm = 1.4036, lr_0 = 1.3846e-04
Loss = 3.5468e-01, PNorm = 63.3185, GNorm = 1.4729, lr_0 = 1.3837e-04
Loss = 3.2743e-01, PNorm = 63.3191, GNorm = 1.2668, lr_0 = 1.3828e-04
Loss = 3.6025e-01, PNorm = 63.3192, GNorm = 1.7911, lr_0 = 1.3818e-04
Loss = 4.4232e-01, PNorm = 63.3194, GNorm = 1.2130, lr_0 = 1.3809e-04
Loss = 3.6873e-01, PNorm = 63.3210, GNorm = 1.6101, lr_0 = 1.3799e-04
Loss = 3.6774e-01, PNorm = 63.3219, GNorm = 1.7104, lr_0 = 1.3790e-04
Loss = 3.8394e-01, PNorm = 63.3242, GNorm = 1.1996, lr_0 = 1.3780e-04
Loss = 3.9817e-01, PNorm = 63.3248, GNorm = 1.6873, lr_0 = 1.3771e-04
Loss = 3.5194e-01, PNorm = 63.3264, GNorm = 1.4967, lr_0 = 1.3761e-04
Loss = 2.9038e-01, PNorm = 63.3287, GNorm = 1.4992, lr_0 = 1.3752e-04
Loss = 3.4564e-01, PNorm = 63.3310, GNorm = 1.7572, lr_0 = 1.3742e-04
Loss = 3.0362e-01, PNorm = 63.3328, GNorm = 1.6344, lr_0 = 1.3733e-04
Loss = 3.5155e-01, PNorm = 63.3333, GNorm = 1.1876, lr_0 = 1.3724e-04
Loss = 3.3328e-01, PNorm = 63.3339, GNorm = 1.3648, lr_0 = 1.3714e-04
Loss = 3.6328e-01, PNorm = 63.3347, GNorm = 1.8709, lr_0 = 1.3705e-04
Loss = 3.2750e-01, PNorm = 63.3362, GNorm = 1.0996, lr_0 = 1.3695e-04
Loss = 3.8051e-01, PNorm = 63.3370, GNorm = 1.3604, lr_0 = 1.3686e-04
Loss = 3.3534e-01, PNorm = 63.3368, GNorm = 1.4356, lr_0 = 1.3677e-04
Loss = 3.2332e-01, PNorm = 63.3407, GNorm = 1.2170, lr_0 = 1.3667e-04
Loss = 3.4398e-01, PNorm = 63.3421, GNorm = 1.1462, lr_0 = 1.3658e-04
Loss = 3.9138e-01, PNorm = 63.3430, GNorm = 2.0649, lr_0 = 1.3649e-04
Loss = 3.6801e-01, PNorm = 63.3448, GNorm = 1.7477, lr_0 = 1.3639e-04
Loss = 3.2592e-01, PNorm = 63.3473, GNorm = 1.4292, lr_0 = 1.3630e-04
Loss = 3.5752e-01, PNorm = 63.3481, GNorm = 1.1455, lr_0 = 1.3621e-04
Loss = 3.8295e-01, PNorm = 63.3485, GNorm = 1.7912, lr_0 = 1.3611e-04
Loss = 3.3813e-01, PNorm = 63.3508, GNorm = 1.8533, lr_0 = 1.3602e-04
Loss = 3.1986e-01, PNorm = 63.3522, GNorm = 1.4897, lr_0 = 1.3593e-04
Loss = 3.5956e-01, PNorm = 63.3524, GNorm = 1.3171, lr_0 = 1.3583e-04
Loss = 3.5781e-01, PNorm = 63.3516, GNorm = 1.5384, lr_0 = 1.3574e-04
Loss = 3.5321e-01, PNorm = 63.3548, GNorm = 2.1923, lr_0 = 1.3565e-04
Loss = 3.7466e-01, PNorm = 63.3572, GNorm = 1.6621, lr_0 = 1.3555e-04
Loss = 3.2864e-01, PNorm = 63.3588, GNorm = 1.1630, lr_0 = 1.3546e-04
Loss = 3.0196e-01, PNorm = 63.3604, GNorm = 1.4752, lr_0 = 1.3537e-04
Loss = 3.7162e-01, PNorm = 63.3606, GNorm = 1.5387, lr_0 = 1.3528e-04
Loss = 3.2456e-01, PNorm = 63.3617, GNorm = 1.8713, lr_0 = 1.3518e-04
Loss = 3.6639e-01, PNorm = 63.3641, GNorm = 1.4965, lr_0 = 1.3509e-04
Loss = 3.7918e-01, PNorm = 63.3649, GNorm = 1.5764, lr_0 = 1.3500e-04
Loss = 3.0850e-01, PNorm = 63.3670, GNorm = 1.7527, lr_0 = 1.3491e-04
Loss = 3.5284e-01, PNorm = 63.3673, GNorm = 1.6661, lr_0 = 1.3481e-04
Loss = 3.0993e-01, PNorm = 63.3667, GNorm = 1.6752, lr_0 = 1.3472e-04
Loss = 3.3755e-01, PNorm = 63.3690, GNorm = 1.2843, lr_0 = 1.3463e-04
Loss = 3.4771e-01, PNorm = 63.3698, GNorm = 2.0697, lr_0 = 1.3454e-04
Loss = 3.5843e-01, PNorm = 63.3719, GNorm = 2.0725, lr_0 = 1.3444e-04
Loss = 3.4764e-01, PNorm = 63.3717, GNorm = 1.4664, lr_0 = 1.3435e-04
Loss = 3.5841e-01, PNorm = 63.3712, GNorm = 1.9450, lr_0 = 1.3426e-04
Loss = 3.5119e-01, PNorm = 63.3744, GNorm = 1.2415, lr_0 = 1.3417e-04
Loss = 3.5669e-01, PNorm = 63.3741, GNorm = 1.3054, lr_0 = 1.3408e-04
Loss = 4.1312e-01, PNorm = 63.3744, GNorm = 1.6833, lr_0 = 1.3398e-04
Loss = 3.2298e-01, PNorm = 63.3748, GNorm = 1.2911, lr_0 = 1.3389e-04
Loss = 3.3397e-01, PNorm = 63.3771, GNorm = 1.2215, lr_0 = 1.3380e-04
Loss = 3.7208e-01, PNorm = 63.3795, GNorm = 1.6136, lr_0 = 1.3371e-04
Loss = 3.4584e-01, PNorm = 63.3798, GNorm = 1.3158, lr_0 = 1.3362e-04
Loss = 3.4383e-01, PNorm = 63.3812, GNorm = 1.4735, lr_0 = 1.3353e-04
Loss = 3.3436e-01, PNorm = 63.3820, GNorm = 2.4839, lr_0 = 1.3343e-04
Loss = 3.6181e-01, PNorm = 63.3829, GNorm = 1.9779, lr_0 = 1.3334e-04
Loss = 3.5398e-01, PNorm = 63.3859, GNorm = 1.4372, lr_0 = 1.3325e-04
Loss = 3.3304e-01, PNorm = 63.3865, GNorm = 1.3972, lr_0 = 1.3316e-04
Loss = 3.5212e-01, PNorm = 63.3882, GNorm = 1.2432, lr_0 = 1.3307e-04
Loss = 3.5968e-01, PNorm = 63.3887, GNorm = 1.7115, lr_0 = 1.3298e-04
Loss = 3.2350e-01, PNorm = 63.3903, GNorm = 1.9381, lr_0 = 1.3289e-04
Loss = 3.5863e-01, PNorm = 63.3912, GNorm = 1.2576, lr_0 = 1.3280e-04
Loss = 3.3164e-01, PNorm = 63.3947, GNorm = 1.1145, lr_0 = 1.3270e-04
Loss = 3.3822e-01, PNorm = 63.3963, GNorm = 1.4646, lr_0 = 1.3261e-04
Loss = 3.6831e-01, PNorm = 63.3952, GNorm = 1.2413, lr_0 = 1.3252e-04
Loss = 3.4350e-01, PNorm = 63.3960, GNorm = 1.9319, lr_0 = 1.3243e-04
Loss = 3.2712e-01, PNorm = 63.3954, GNorm = 1.2171, lr_0 = 1.3234e-04
Loss = 3.3296e-01, PNorm = 63.3969, GNorm = 1.2375, lr_0 = 1.3225e-04
Loss = 3.3135e-01, PNorm = 63.3974, GNorm = 1.6183, lr_0 = 1.3216e-04
Loss = 3.4034e-01, PNorm = 63.3974, GNorm = 1.7497, lr_0 = 1.3207e-04
Loss = 3.2037e-01, PNorm = 63.3987, GNorm = 1.0612, lr_0 = 1.3198e-04
Loss = 3.5411e-01, PNorm = 63.3990, GNorm = 2.0403, lr_0 = 1.3189e-04
Loss = 3.3597e-01, PNorm = 63.4003, GNorm = 1.5671, lr_0 = 1.3180e-04
Loss = 4.4477e-01, PNorm = 63.4020, GNorm = 1.5906, lr_0 = 1.3171e-04
Loss = 3.3550e-01, PNorm = 63.4042, GNorm = 1.2484, lr_0 = 1.3162e-04
Loss = 3.2144e-01, PNorm = 63.4050, GNorm = 1.1186, lr_0 = 1.3153e-04
Loss = 3.7559e-01, PNorm = 63.4051, GNorm = 1.4622, lr_0 = 1.3144e-04
Loss = 3.2381e-01, PNorm = 63.4055, GNorm = 1.3657, lr_0 = 1.3135e-04
Loss = 3.8515e-01, PNorm = 63.4050, GNorm = 1.3829, lr_0 = 1.3126e-04
Loss = 3.1535e-01, PNorm = 63.4073, GNorm = 1.5352, lr_0 = 1.3117e-04
Loss = 3.3796e-01, PNorm = 63.4079, GNorm = 2.0505, lr_0 = 1.3108e-04
Loss = 3.6880e-01, PNorm = 63.4087, GNorm = 1.3294, lr_0 = 1.3099e-04
Loss = 3.6848e-01, PNorm = 63.4104, GNorm = 1.8523, lr_0 = 1.3090e-04
Loss = 3.4749e-01, PNorm = 63.4127, GNorm = 1.2272, lr_0 = 1.3081e-04
Loss = 3.4364e-01, PNorm = 63.4132, GNorm = 1.8847, lr_0 = 1.3072e-04
Loss = 3.4793e-01, PNorm = 63.4123, GNorm = 2.1172, lr_0 = 1.3063e-04
Loss = 3.4475e-01, PNorm = 63.4148, GNorm = 1.5259, lr_0 = 1.3054e-04
Loss = 3.2390e-01, PNorm = 63.4192, GNorm = 1.2222, lr_0 = 1.3045e-04
Loss = 3.2060e-01, PNorm = 63.4220, GNorm = 1.2450, lr_0 = 1.3036e-04
Loss = 3.3987e-01, PNorm = 63.4241, GNorm = 1.1700, lr_0 = 1.3027e-04
Loss = 3.0006e-01, PNorm = 63.4242, GNorm = 1.2026, lr_0 = 1.3018e-04
Loss = 3.7028e-01, PNorm = 63.4246, GNorm = 1.2958, lr_0 = 1.3009e-04
Loss = 4.0534e-01, PNorm = 63.4267, GNorm = 2.0217, lr_0 = 1.3000e-04
Loss = 3.3579e-01, PNorm = 63.4291, GNorm = 1.4136, lr_0 = 1.2992e-04
Loss = 3.3119e-01, PNorm = 63.4284, GNorm = 1.1409, lr_0 = 1.2983e-04
Loss = 3.4254e-01, PNorm = 63.4304, GNorm = 1.5122, lr_0 = 1.2974e-04
Loss = 3.3757e-01, PNorm = 63.4335, GNorm = 1.5394, lr_0 = 1.2965e-04
Loss = 3.4589e-01, PNorm = 63.4347, GNorm = 2.1046, lr_0 = 1.2956e-04
Loss = 3.5183e-01, PNorm = 63.4354, GNorm = 1.4440, lr_0 = 1.2947e-04
Loss = 3.4391e-01, PNorm = 63.4362, GNorm = 2.4951, lr_0 = 1.2938e-04
Loss = 3.5168e-01, PNorm = 63.4382, GNorm = 1.3326, lr_0 = 1.2929e-04
Loss = 3.9247e-01, PNorm = 63.4408, GNorm = 2.1225, lr_0 = 1.2921e-04
Loss = 3.7555e-01, PNorm = 63.4434, GNorm = 1.6178, lr_0 = 1.2912e-04
Loss = 3.0476e-01, PNorm = 63.4459, GNorm = 1.0975, lr_0 = 1.2903e-04
Loss = 3.3280e-01, PNorm = 63.4475, GNorm = 1.2035, lr_0 = 1.2894e-04
Loss = 4.3341e-01, PNorm = 63.4475, GNorm = 1.2708, lr_0 = 1.2885e-04
Loss = 3.1058e-01, PNorm = 63.4501, GNorm = 1.3177, lr_0 = 1.2876e-04
Loss = 3.0318e-01, PNorm = 63.4523, GNorm = 1.1093, lr_0 = 1.2867e-04
Loss = 3.6602e-01, PNorm = 63.4531, GNorm = 2.1901, lr_0 = 1.2859e-04
Loss = 3.6316e-01, PNorm = 63.4531, GNorm = 1.4875, lr_0 = 1.2850e-04
Loss = 3.2042e-01, PNorm = 63.4535, GNorm = 1.2719, lr_0 = 1.2841e-04
Loss = 3.4195e-01, PNorm = 63.4536, GNorm = 1.8526, lr_0 = 1.2832e-04
Loss = 3.4730e-01, PNorm = 63.4536, GNorm = 1.4021, lr_0 = 1.2823e-04
Loss = 3.1540e-01, PNorm = 63.4528, GNorm = 2.0324, lr_0 = 1.2815e-04
Loss = 4.6008e-01, PNorm = 63.4509, GNorm = 1.4257, lr_0 = 1.2806e-04
Loss = 3.3471e-01, PNorm = 63.4552, GNorm = 1.2324, lr_0 = 1.2797e-04
Validation mae = 0.111370
Epoch 27
Loss = 3.9019e-01, PNorm = 63.4562, GNorm = 1.3191, lr_0 = 1.2788e-04
Loss = 3.5414e-01, PNorm = 63.4554, GNorm = 2.1366, lr_0 = 1.2780e-04
Loss = 3.8210e-01, PNorm = 63.4562, GNorm = 2.1478, lr_0 = 1.2771e-04
Loss = 3.3135e-01, PNorm = 63.4549, GNorm = 1.7689, lr_0 = 1.2762e-04
Loss = 3.1340e-01, PNorm = 63.4568, GNorm = 1.7296, lr_0 = 1.2753e-04
Loss = 3.9896e-01, PNorm = 63.4586, GNorm = 1.9145, lr_0 = 1.2745e-04
Loss = 3.3887e-01, PNorm = 63.4599, GNorm = 1.3047, lr_0 = 1.2736e-04
Loss = 3.4270e-01, PNorm = 63.4620, GNorm = 1.8739, lr_0 = 1.2727e-04
Loss = 3.8988e-01, PNorm = 63.4621, GNorm = 1.4109, lr_0 = 1.2718e-04
Loss = 3.5449e-01, PNorm = 63.4630, GNorm = 1.3437, lr_0 = 1.2710e-04
Loss = 3.6820e-01, PNorm = 63.4634, GNorm = 1.4468, lr_0 = 1.2701e-04
Loss = 3.3933e-01, PNorm = 63.4621, GNorm = 1.6755, lr_0 = 1.2692e-04
Loss = 3.3168e-01, PNorm = 63.4631, GNorm = 1.2089, lr_0 = 1.2684e-04
Loss = 3.4286e-01, PNorm = 63.4653, GNorm = 2.5904, lr_0 = 1.2675e-04
Loss = 3.5421e-01, PNorm = 63.4666, GNorm = 1.4628, lr_0 = 1.2666e-04
Loss = 3.8373e-01, PNorm = 63.4699, GNorm = 1.2802, lr_0 = 1.2658e-04
Loss = 3.3605e-01, PNorm = 63.4730, GNorm = 1.3870, lr_0 = 1.2649e-04
Loss = 3.4545e-01, PNorm = 63.4759, GNorm = 1.1874, lr_0 = 1.2640e-04
Loss = 3.3182e-01, PNorm = 63.4783, GNorm = 1.6357, lr_0 = 1.2632e-04
Loss = 3.6469e-01, PNorm = 63.4803, GNorm = 1.8329, lr_0 = 1.2623e-04
Loss = 3.9860e-01, PNorm = 63.4822, GNorm = 1.3396, lr_0 = 1.2614e-04
Loss = 3.6625e-01, PNorm = 63.4836, GNorm = 1.2563, lr_0 = 1.2606e-04
Loss = 3.5432e-01, PNorm = 63.4857, GNorm = 1.5757, lr_0 = 1.2597e-04
Loss = 3.4880e-01, PNorm = 63.4860, GNorm = 2.0616, lr_0 = 1.2588e-04
Loss = 3.1805e-01, PNorm = 63.4879, GNorm = 1.0852, lr_0 = 1.2580e-04
Loss = 3.2616e-01, PNorm = 63.4910, GNorm = 1.6955, lr_0 = 1.2571e-04
Loss = 3.3348e-01, PNorm = 63.4897, GNorm = 1.2876, lr_0 = 1.2563e-04
Loss = 3.5058e-01, PNorm = 63.4904, GNorm = 1.1566, lr_0 = 1.2554e-04
Loss = 2.9849e-01, PNorm = 63.4913, GNorm = 1.0716, lr_0 = 1.2545e-04
Loss = 3.2007e-01, PNorm = 63.4915, GNorm = 1.3738, lr_0 = 1.2537e-04
Loss = 3.2942e-01, PNorm = 63.4936, GNorm = 1.4667, lr_0 = 1.2528e-04
Loss = 3.4171e-01, PNorm = 63.4939, GNorm = 1.4204, lr_0 = 1.2520e-04
Loss = 3.1837e-01, PNorm = 63.4952, GNorm = 1.4951, lr_0 = 1.2511e-04
Loss = 3.1751e-01, PNorm = 63.4980, GNorm = 1.1679, lr_0 = 1.2502e-04
Loss = 3.2361e-01, PNorm = 63.4999, GNorm = 1.2964, lr_0 = 1.2494e-04
Loss = 3.1381e-01, PNorm = 63.4992, GNorm = 1.4137, lr_0 = 1.2485e-04
Loss = 3.3759e-01, PNorm = 63.4989, GNorm = 1.5000, lr_0 = 1.2477e-04
Loss = 3.5707e-01, PNorm = 63.4998, GNorm = 1.6059, lr_0 = 1.2468e-04
Loss = 3.8577e-01, PNorm = 63.4996, GNorm = 1.7372, lr_0 = 1.2460e-04
Loss = 4.4357e-01, PNorm = 63.5014, GNorm = 2.3270, lr_0 = 1.2451e-04
Loss = 3.4227e-01, PNorm = 63.5046, GNorm = 1.6688, lr_0 = 1.2443e-04
Loss = 3.5077e-01, PNorm = 63.5075, GNorm = 1.3643, lr_0 = 1.2434e-04
Loss = 3.5139e-01, PNorm = 63.5101, GNorm = 1.2894, lr_0 = 1.2426e-04
Loss = 3.0412e-01, PNorm = 63.5113, GNorm = 1.2913, lr_0 = 1.2417e-04
Loss = 3.4629e-01, PNorm = 63.5107, GNorm = 1.4427, lr_0 = 1.2409e-04
Loss = 3.2393e-01, PNorm = 63.5123, GNorm = 1.9446, lr_0 = 1.2400e-04
Loss = 3.3533e-01, PNorm = 63.5141, GNorm = 1.2136, lr_0 = 1.2392e-04
Loss = 3.2598e-01, PNorm = 63.5146, GNorm = 1.6001, lr_0 = 1.2383e-04
Loss = 3.7134e-01, PNorm = 63.5152, GNorm = 1.0263, lr_0 = 1.2375e-04
Loss = 3.3986e-01, PNorm = 63.5169, GNorm = 1.5417, lr_0 = 1.2366e-04
Loss = 3.3563e-01, PNorm = 63.5170, GNorm = 1.7326, lr_0 = 1.2358e-04
Loss = 3.6497e-01, PNorm = 63.5170, GNorm = 1.3338, lr_0 = 1.2349e-04
Loss = 3.1231e-01, PNorm = 63.5198, GNorm = 1.3966, lr_0 = 1.2341e-04
Loss = 3.6294e-01, PNorm = 63.5218, GNorm = 1.5756, lr_0 = 1.2332e-04
Loss = 3.5888e-01, PNorm = 63.5235, GNorm = 1.0260, lr_0 = 1.2324e-04
Loss = 3.5870e-01, PNorm = 63.5255, GNorm = 1.2192, lr_0 = 1.2315e-04
Loss = 3.9913e-01, PNorm = 63.5274, GNorm = 1.6253, lr_0 = 1.2307e-04
Loss = 2.9994e-01, PNorm = 63.5275, GNorm = 1.4718, lr_0 = 1.2298e-04
Loss = 3.6201e-01, PNorm = 63.5269, GNorm = 1.4687, lr_0 = 1.2290e-04
Loss = 3.4681e-01, PNorm = 63.5263, GNorm = 1.4246, lr_0 = 1.2282e-04
Loss = 3.2567e-01, PNorm = 63.5261, GNorm = 1.4125, lr_0 = 1.2273e-04
Loss = 3.3083e-01, PNorm = 63.5277, GNorm = 1.6000, lr_0 = 1.2265e-04
Loss = 3.3925e-01, PNorm = 63.5302, GNorm = 1.6092, lr_0 = 1.2256e-04
Loss = 3.2886e-01, PNorm = 63.5315, GNorm = 1.2274, lr_0 = 1.2248e-04
Loss = 3.4356e-01, PNorm = 63.5322, GNorm = 1.4531, lr_0 = 1.2240e-04
Loss = 3.2234e-01, PNorm = 63.5328, GNorm = 1.6775, lr_0 = 1.2231e-04
Loss = 3.7577e-01, PNorm = 63.5339, GNorm = 1.1633, lr_0 = 1.2223e-04
Loss = 3.4571e-01, PNorm = 63.5349, GNorm = 1.9753, lr_0 = 1.2214e-04
Loss = 3.5452e-01, PNorm = 63.5355, GNorm = 1.2200, lr_0 = 1.2206e-04
Loss = 3.9512e-01, PNorm = 63.5361, GNorm = 1.5230, lr_0 = 1.2198e-04
Loss = 3.7973e-01, PNorm = 63.5388, GNorm = 1.6214, lr_0 = 1.2189e-04
Loss = 3.4146e-01, PNorm = 63.5406, GNorm = 1.2198, lr_0 = 1.2181e-04
Loss = 3.5319e-01, PNorm = 63.5406, GNorm = 1.3626, lr_0 = 1.2173e-04
Loss = 3.3641e-01, PNorm = 63.5405, GNorm = 1.4550, lr_0 = 1.2164e-04
Loss = 3.2363e-01, PNorm = 63.5414, GNorm = 1.2168, lr_0 = 1.2156e-04
Loss = 3.6078e-01, PNorm = 63.5438, GNorm = 1.3719, lr_0 = 1.2148e-04
Loss = 3.5607e-01, PNorm = 63.5455, GNorm = 1.5297, lr_0 = 1.2139e-04
Loss = 3.8252e-01, PNorm = 63.5452, GNorm = 1.3269, lr_0 = 1.2131e-04
Loss = 3.0406e-01, PNorm = 63.5465, GNorm = 1.2745, lr_0 = 1.2123e-04
Loss = 3.6159e-01, PNorm = 63.5478, GNorm = 1.9397, lr_0 = 1.2114e-04
Loss = 3.4844e-01, PNorm = 63.5486, GNorm = 1.8417, lr_0 = 1.2106e-04
Loss = 3.7092e-01, PNorm = 63.5486, GNorm = 1.3081, lr_0 = 1.2098e-04
Loss = 3.3805e-01, PNorm = 63.5510, GNorm = 1.9800, lr_0 = 1.2090e-04
Loss = 3.5464e-01, PNorm = 63.5514, GNorm = 1.4319, lr_0 = 1.2081e-04
Loss = 3.1503e-01, PNorm = 63.5513, GNorm = 1.5842, lr_0 = 1.2073e-04
Loss = 3.0515e-01, PNorm = 63.5534, GNorm = 1.9070, lr_0 = 1.2065e-04
Loss = 3.0400e-01, PNorm = 63.5544, GNorm = 1.2239, lr_0 = 1.2056e-04
Loss = 3.7877e-01, PNorm = 63.5547, GNorm = 2.5569, lr_0 = 1.2048e-04
Loss = 3.5574e-01, PNorm = 63.5555, GNorm = 1.5267, lr_0 = 1.2040e-04
Loss = 3.4519e-01, PNorm = 63.5584, GNorm = 1.7399, lr_0 = 1.2032e-04
Loss = 3.6097e-01, PNorm = 63.5609, GNorm = 1.5529, lr_0 = 1.2023e-04
Loss = 3.6147e-01, PNorm = 63.5600, GNorm = 1.4103, lr_0 = 1.2015e-04
Loss = 3.7699e-01, PNorm = 63.5608, GNorm = 1.5171, lr_0 = 1.2007e-04
Loss = 3.8489e-01, PNorm = 63.5627, GNorm = 1.2511, lr_0 = 1.1999e-04
Loss = 3.5154e-01, PNorm = 63.5619, GNorm = 1.3218, lr_0 = 1.1991e-04
Loss = 3.4370e-01, PNorm = 63.5610, GNorm = 1.5854, lr_0 = 1.1982e-04
Loss = 3.5864e-01, PNorm = 63.5638, GNorm = 1.3933, lr_0 = 1.1974e-04
Loss = 3.5436e-01, PNorm = 63.5658, GNorm = 1.0402, lr_0 = 1.1966e-04
Loss = 2.9874e-01, PNorm = 63.5681, GNorm = 1.6276, lr_0 = 1.1958e-04
Loss = 3.7891e-01, PNorm = 63.5684, GNorm = 1.7247, lr_0 = 1.1950e-04
Loss = 3.3992e-01, PNorm = 63.5681, GNorm = 1.3304, lr_0 = 1.1941e-04
Loss = 3.3449e-01, PNorm = 63.5687, GNorm = 1.3649, lr_0 = 1.1933e-04
Loss = 3.3156e-01, PNorm = 63.5697, GNorm = 1.9608, lr_0 = 1.1925e-04
Loss = 3.1647e-01, PNorm = 63.5710, GNorm = 1.4758, lr_0 = 1.1917e-04
Loss = 3.3171e-01, PNorm = 63.5712, GNorm = 1.2806, lr_0 = 1.1909e-04
Loss = 3.2111e-01, PNorm = 63.5726, GNorm = 1.5450, lr_0 = 1.1901e-04
Loss = 3.5921e-01, PNorm = 63.5750, GNorm = 1.2303, lr_0 = 1.1892e-04
Loss = 3.7460e-01, PNorm = 63.5773, GNorm = 1.9628, lr_0 = 1.1884e-04
Loss = 3.7126e-01, PNorm = 63.5773, GNorm = 1.7066, lr_0 = 1.1876e-04
Loss = 3.7850e-01, PNorm = 63.5792, GNorm = 1.3104, lr_0 = 1.1868e-04
Loss = 3.1056e-01, PNorm = 63.5809, GNorm = 1.5451, lr_0 = 1.1860e-04
Loss = 3.6926e-01, PNorm = 63.5791, GNorm = 1.4037, lr_0 = 1.1852e-04
Loss = 3.4878e-01, PNorm = 63.5786, GNorm = 1.6337, lr_0 = 1.1844e-04
Loss = 3.4556e-01, PNorm = 63.5795, GNorm = 1.4577, lr_0 = 1.1835e-04
Loss = 3.4253e-01, PNorm = 63.5826, GNorm = 1.4169, lr_0 = 1.1827e-04
Loss = 3.3076e-01, PNorm = 63.5844, GNorm = 1.4225, lr_0 = 1.1819e-04
Loss = 3.5422e-01, PNorm = 63.5859, GNorm = 1.3658, lr_0 = 1.1811e-04
Loss = 3.5388e-01, PNorm = 63.5859, GNorm = 1.6248, lr_0 = 1.1803e-04
Loss = 3.3963e-01, PNorm = 63.5868, GNorm = 1.1574, lr_0 = 1.1795e-04
Loss = 3.1341e-01, PNorm = 63.5887, GNorm = 1.9927, lr_0 = 1.1787e-04
Validation mae = 0.110980
Epoch 28
Loss = 3.9266e-01, PNorm = 63.5897, GNorm = 1.4728, lr_0 = 1.1779e-04
Loss = 3.3050e-01, PNorm = 63.5910, GNorm = 1.2312, lr_0 = 1.1771e-04
Loss = 2.8640e-01, PNorm = 63.5913, GNorm = 1.1441, lr_0 = 1.1763e-04
Loss = 3.4197e-01, PNorm = 63.5912, GNorm = 1.1716, lr_0 = 1.1755e-04
Loss = 3.6395e-01, PNorm = 63.5905, GNorm = 1.9890, lr_0 = 1.1747e-04
Loss = 3.5847e-01, PNorm = 63.5910, GNorm = 1.6340, lr_0 = 1.1739e-04
Loss = 3.2801e-01, PNorm = 63.5916, GNorm = 1.3733, lr_0 = 1.1730e-04
Loss = 3.3436e-01, PNorm = 63.5939, GNorm = 1.3220, lr_0 = 1.1722e-04
Loss = 3.1750e-01, PNorm = 63.5974, GNorm = 2.2916, lr_0 = 1.1714e-04
Loss = 3.3677e-01, PNorm = 63.5989, GNorm = 1.1437, lr_0 = 1.1706e-04
Loss = 3.6395e-01, PNorm = 63.6006, GNorm = 1.9842, lr_0 = 1.1698e-04
Loss = 3.2971e-01, PNorm = 63.6018, GNorm = 1.3712, lr_0 = 1.1690e-04
Loss = 3.3176e-01, PNorm = 63.6021, GNorm = 1.5332, lr_0 = 1.1682e-04
Loss = 3.6558e-01, PNorm = 63.6035, GNorm = 0.9413, lr_0 = 1.1674e-04
Loss = 3.1657e-01, PNorm = 63.6069, GNorm = 2.0972, lr_0 = 1.1666e-04
Loss = 3.0086e-01, PNorm = 63.6098, GNorm = 1.2834, lr_0 = 1.1658e-04
Loss = 3.1579e-01, PNorm = 63.6109, GNorm = 1.3691, lr_0 = 1.1650e-04
Loss = 3.2012e-01, PNorm = 63.6107, GNorm = 1.9402, lr_0 = 1.1642e-04
Loss = 2.8767e-01, PNorm = 63.6106, GNorm = 1.7376, lr_0 = 1.1634e-04
Loss = 4.0311e-01, PNorm = 63.6128, GNorm = 1.7790, lr_0 = 1.1626e-04
Loss = 3.2935e-01, PNorm = 63.6143, GNorm = 1.2449, lr_0 = 1.1618e-04
Loss = 3.6425e-01, PNorm = 63.6134, GNorm = 1.6268, lr_0 = 1.1611e-04
Loss = 3.3376e-01, PNorm = 63.6142, GNorm = 1.7645, lr_0 = 1.1603e-04
Loss = 3.6737e-01, PNorm = 63.6165, GNorm = 1.3130, lr_0 = 1.1595e-04
Loss = 3.0246e-01, PNorm = 63.6171, GNorm = 1.2540, lr_0 = 1.1587e-04
Loss = 3.4384e-01, PNorm = 63.6173, GNorm = 1.4276, lr_0 = 1.1579e-04
Loss = 3.4065e-01, PNorm = 63.6184, GNorm = 1.8588, lr_0 = 1.1571e-04
Loss = 2.9793e-01, PNorm = 63.6207, GNorm = 1.4026, lr_0 = 1.1563e-04
Loss = 3.8158e-01, PNorm = 63.6213, GNorm = 1.5252, lr_0 = 1.1555e-04
Loss = 3.0796e-01, PNorm = 63.6239, GNorm = 1.6012, lr_0 = 1.1547e-04
Loss = 3.0120e-01, PNorm = 63.6256, GNorm = 1.6295, lr_0 = 1.1539e-04
Loss = 3.6538e-01, PNorm = 63.6261, GNorm = 1.4195, lr_0 = 1.1531e-04
Loss = 3.4653e-01, PNorm = 63.6262, GNorm = 1.4594, lr_0 = 1.1523e-04
Loss = 3.5165e-01, PNorm = 63.6271, GNorm = 2.1380, lr_0 = 1.1515e-04
Loss = 3.3148e-01, PNorm = 63.6282, GNorm = 1.3176, lr_0 = 1.1508e-04
Loss = 3.0866e-01, PNorm = 63.6297, GNorm = 1.5177, lr_0 = 1.1500e-04
Loss = 3.6789e-01, PNorm = 63.6314, GNorm = 1.4923, lr_0 = 1.1492e-04
Loss = 3.2602e-01, PNorm = 63.6319, GNorm = 1.3815, lr_0 = 1.1484e-04
Loss = 3.0016e-01, PNorm = 63.6320, GNorm = 1.4393, lr_0 = 1.1476e-04
Loss = 2.9349e-01, PNorm = 63.6322, GNorm = 1.2784, lr_0 = 1.1468e-04
Loss = 3.8373e-01, PNorm = 63.6335, GNorm = 1.2874, lr_0 = 1.1460e-04
Loss = 3.3736e-01, PNorm = 63.6346, GNorm = 1.7354, lr_0 = 1.1452e-04
Loss = 3.1490e-01, PNorm = 63.6360, GNorm = 1.5777, lr_0 = 1.1445e-04
Loss = 3.2094e-01, PNorm = 63.6359, GNorm = 1.3838, lr_0 = 1.1437e-04
Loss = 3.3463e-01, PNorm = 63.6381, GNorm = 1.6753, lr_0 = 1.1429e-04
Loss = 3.2343e-01, PNorm = 63.6402, GNorm = 1.7111, lr_0 = 1.1421e-04
Loss = 3.4652e-01, PNorm = 63.6397, GNorm = 1.2278, lr_0 = 1.1413e-04
Loss = 3.9016e-01, PNorm = 63.6393, GNorm = 1.6763, lr_0 = 1.1405e-04
Loss = 3.3172e-01, PNorm = 63.6400, GNorm = 1.2890, lr_0 = 1.1398e-04
Loss = 3.6683e-01, PNorm = 63.6402, GNorm = 1.8671, lr_0 = 1.1390e-04
Loss = 3.3508e-01, PNorm = 63.6412, GNorm = 1.8121, lr_0 = 1.1382e-04
Loss = 3.4654e-01, PNorm = 63.6408, GNorm = 1.3459, lr_0 = 1.1374e-04
Loss = 3.2801e-01, PNorm = 63.6412, GNorm = 1.7654, lr_0 = 1.1366e-04
Loss = 3.3281e-01, PNorm = 63.6424, GNorm = 1.5470, lr_0 = 1.1359e-04
Loss = 3.5289e-01, PNorm = 63.6432, GNorm = 1.4723, lr_0 = 1.1351e-04
Loss = 3.2975e-01, PNorm = 63.6435, GNorm = 1.3645, lr_0 = 1.1343e-04
Loss = 3.7804e-01, PNorm = 63.6438, GNorm = 2.8771, lr_0 = 1.1335e-04
Loss = 3.4485e-01, PNorm = 63.6439, GNorm = 1.4890, lr_0 = 1.1328e-04
Loss = 3.4301e-01, PNorm = 63.6457, GNorm = 1.1580, lr_0 = 1.1320e-04
Loss = 3.1839e-01, PNorm = 63.6463, GNorm = 1.9385, lr_0 = 1.1312e-04
Loss = 3.7909e-01, PNorm = 63.6447, GNorm = 1.7849, lr_0 = 1.1304e-04
Loss = 3.4189e-01, PNorm = 63.6454, GNorm = 1.5008, lr_0 = 1.1297e-04
Loss = 3.5119e-01, PNorm = 63.6471, GNorm = 1.2610, lr_0 = 1.1289e-04
Loss = 2.7484e-01, PNorm = 63.6476, GNorm = 1.1744, lr_0 = 1.1281e-04
Loss = 3.5022e-01, PNorm = 63.6495, GNorm = 1.7413, lr_0 = 1.1273e-04
Loss = 3.5829e-01, PNorm = 63.6513, GNorm = 1.4019, lr_0 = 1.1266e-04
Loss = 3.2867e-01, PNorm = 63.6537, GNorm = 1.5789, lr_0 = 1.1258e-04
Loss = 3.3926e-01, PNorm = 63.6562, GNorm = 1.8267, lr_0 = 1.1250e-04
Loss = 3.6992e-01, PNorm = 63.6575, GNorm = 1.1677, lr_0 = 1.1243e-04
Loss = 4.0730e-01, PNorm = 63.6571, GNorm = 1.7504, lr_0 = 1.1235e-04
Loss = 3.0648e-01, PNorm = 63.6594, GNorm = 1.4383, lr_0 = 1.1227e-04
Loss = 3.6349e-01, PNorm = 63.6616, GNorm = 1.1441, lr_0 = 1.1219e-04
Loss = 3.6907e-01, PNorm = 63.6620, GNorm = 2.1525, lr_0 = 1.1212e-04
Loss = 3.3231e-01, PNorm = 63.6628, GNorm = 1.7299, lr_0 = 1.1204e-04
Loss = 3.4896e-01, PNorm = 63.6654, GNorm = 1.7973, lr_0 = 1.1196e-04
Loss = 3.7195e-01, PNorm = 63.6658, GNorm = 1.2659, lr_0 = 1.1189e-04
Loss = 3.7149e-01, PNorm = 63.6653, GNorm = 1.6187, lr_0 = 1.1181e-04
Loss = 3.4655e-01, PNorm = 63.6666, GNorm = 2.5798, lr_0 = 1.1173e-04
Loss = 3.2450e-01, PNorm = 63.6688, GNorm = 1.6065, lr_0 = 1.1166e-04
Loss = 3.3385e-01, PNorm = 63.6695, GNorm = 2.5538, lr_0 = 1.1158e-04
Loss = 3.1064e-01, PNorm = 63.6710, GNorm = 1.8037, lr_0 = 1.1150e-04
Loss = 3.8332e-01, PNorm = 63.6708, GNorm = 1.5535, lr_0 = 1.1143e-04
Loss = 3.2326e-01, PNorm = 63.6698, GNorm = 1.3939, lr_0 = 1.1135e-04
Loss = 3.6014e-01, PNorm = 63.6701, GNorm = 1.8285, lr_0 = 1.1128e-04
Loss = 3.4039e-01, PNorm = 63.6718, GNorm = 1.3682, lr_0 = 1.1120e-04
Loss = 3.5211e-01, PNorm = 63.6744, GNorm = 1.2752, lr_0 = 1.1112e-04
Loss = 3.6353e-01, PNorm = 63.6741, GNorm = 1.6437, lr_0 = 1.1105e-04
Loss = 3.2637e-01, PNorm = 63.6746, GNorm = 1.3130, lr_0 = 1.1097e-04
Loss = 3.1110e-01, PNorm = 63.6758, GNorm = 1.4837, lr_0 = 1.1089e-04
Loss = 2.7370e-01, PNorm = 63.6765, GNorm = 1.1704, lr_0 = 1.1082e-04
Loss = 3.7054e-01, PNorm = 63.6759, GNorm = 1.1945, lr_0 = 1.1074e-04
Loss = 3.2279e-01, PNorm = 63.6759, GNorm = 1.7435, lr_0 = 1.1067e-04
Loss = 3.5182e-01, PNorm = 63.6778, GNorm = 1.1999, lr_0 = 1.1059e-04
Loss = 4.1291e-01, PNorm = 63.6807, GNorm = 1.6863, lr_0 = 1.1052e-04
Loss = 3.3351e-01, PNorm = 63.6825, GNorm = 1.2126, lr_0 = 1.1044e-04
Loss = 3.2634e-01, PNorm = 63.6837, GNorm = 1.5163, lr_0 = 1.1036e-04
Loss = 3.1762e-01, PNorm = 63.6853, GNorm = 1.2041, lr_0 = 1.1029e-04
Loss = 3.4657e-01, PNorm = 63.6874, GNorm = 1.6640, lr_0 = 1.1021e-04
Loss = 3.0492e-01, PNorm = 63.6888, GNorm = 0.9442, lr_0 = 1.1014e-04
Loss = 3.4241e-01, PNorm = 63.6890, GNorm = 1.1575, lr_0 = 1.1006e-04
Loss = 3.6306e-01, PNorm = 63.6899, GNorm = 1.2613, lr_0 = 1.0999e-04
Loss = 3.6092e-01, PNorm = 63.6920, GNorm = 1.1626, lr_0 = 1.0991e-04
Loss = 3.8758e-01, PNorm = 63.6937, GNorm = 4.4388, lr_0 = 1.0984e-04
Loss = 3.8772e-01, PNorm = 63.6940, GNorm = 1.3960, lr_0 = 1.0976e-04
Loss = 3.9868e-01, PNorm = 63.6947, GNorm = 1.4493, lr_0 = 1.0969e-04
Loss = 3.7398e-01, PNorm = 63.6960, GNorm = 1.6019, lr_0 = 1.0961e-04
Loss = 3.3356e-01, PNorm = 63.6966, GNorm = 1.5593, lr_0 = 1.0954e-04
Loss = 3.3781e-01, PNorm = 63.6979, GNorm = 1.3096, lr_0 = 1.0946e-04
Loss = 3.7869e-01, PNorm = 63.7004, GNorm = 1.6786, lr_0 = 1.0939e-04
Loss = 3.1326e-01, PNorm = 63.7019, GNorm = 1.9341, lr_0 = 1.0931e-04
Loss = 3.3156e-01, PNorm = 63.7044, GNorm = 1.5404, lr_0 = 1.0924e-04
Loss = 3.5444e-01, PNorm = 63.7044, GNorm = 1.4534, lr_0 = 1.0916e-04
Loss = 3.5113e-01, PNorm = 63.7049, GNorm = 1.2011, lr_0 = 1.0909e-04
Loss = 3.4539e-01, PNorm = 63.7062, GNorm = 2.1250, lr_0 = 1.0901e-04
Loss = 3.4546e-01, PNorm = 63.7077, GNorm = 1.2267, lr_0 = 1.0894e-04
Loss = 3.5277e-01, PNorm = 63.7090, GNorm = 1.6521, lr_0 = 1.0886e-04
Loss = 3.4649e-01, PNorm = 63.7098, GNorm = 1.9781, lr_0 = 1.0879e-04
Loss = 3.6844e-01, PNorm = 63.7091, GNorm = 1.5813, lr_0 = 1.0871e-04
Loss = 3.6302e-01, PNorm = 63.7091, GNorm = 1.4551, lr_0 = 1.0864e-04
Loss = 3.7646e-01, PNorm = 63.7091, GNorm = 1.4599, lr_0 = 1.0856e-04
Validation mae = 0.110796
Epoch 29
Loss = 3.3620e-01, PNorm = 63.7086, GNorm = 1.3430, lr_0 = 1.0849e-04
Loss = 3.5760e-01, PNorm = 63.7092, GNorm = 1.3554, lr_0 = 1.0841e-04
Loss = 3.2417e-01, PNorm = 63.7111, GNorm = 1.6419, lr_0 = 1.0834e-04
Loss = 3.4765e-01, PNorm = 63.7123, GNorm = 1.8850, lr_0 = 1.0827e-04
Loss = 3.9424e-01, PNorm = 63.7113, GNorm = 2.1901, lr_0 = 1.0819e-04
Loss = 3.3152e-01, PNorm = 63.7125, GNorm = 1.6731, lr_0 = 1.0812e-04
Loss = 3.2885e-01, PNorm = 63.7153, GNorm = 1.0974, lr_0 = 1.0804e-04
Loss = 3.6532e-01, PNorm = 63.7176, GNorm = 2.4359, lr_0 = 1.0797e-04
Loss = 3.3720e-01, PNorm = 63.7188, GNorm = 1.5942, lr_0 = 1.0790e-04
Loss = 3.5431e-01, PNorm = 63.7211, GNorm = 1.9872, lr_0 = 1.0782e-04
Loss = 3.0805e-01, PNorm = 63.7201, GNorm = 1.2949, lr_0 = 1.0775e-04
Loss = 3.5568e-01, PNorm = 63.7188, GNorm = 1.2412, lr_0 = 1.0767e-04
Loss = 3.2550e-01, PNorm = 63.7196, GNorm = 2.1392, lr_0 = 1.0760e-04
Loss = 3.0419e-01, PNorm = 63.7212, GNorm = 0.9651, lr_0 = 1.0753e-04
Loss = 3.9388e-01, PNorm = 63.7227, GNorm = 1.5005, lr_0 = 1.0745e-04
Loss = 2.8808e-01, PNorm = 63.7241, GNorm = 1.2672, lr_0 = 1.0738e-04
Loss = 3.3650e-01, PNorm = 63.7239, GNorm = 2.2552, lr_0 = 1.0731e-04
Loss = 3.2327e-01, PNorm = 63.7254, GNorm = 1.6298, lr_0 = 1.0723e-04
Loss = 3.2087e-01, PNorm = 63.7284, GNorm = 1.6880, lr_0 = 1.0716e-04
Loss = 4.4329e-01, PNorm = 63.7294, GNorm = 1.3598, lr_0 = 1.0709e-04
Loss = 3.2720e-01, PNorm = 63.7301, GNorm = 1.6546, lr_0 = 1.0701e-04
Loss = 3.3257e-01, PNorm = 63.7330, GNorm = 1.9796, lr_0 = 1.0694e-04
Loss = 2.9038e-01, PNorm = 63.7351, GNorm = 1.3622, lr_0 = 1.0687e-04
Loss = 3.0557e-01, PNorm = 63.7350, GNorm = 1.1327, lr_0 = 1.0679e-04
Loss = 3.3920e-01, PNorm = 63.7349, GNorm = 1.2454, lr_0 = 1.0672e-04
Loss = 3.9596e-01, PNorm = 63.7362, GNorm = 1.8817, lr_0 = 1.0665e-04
Loss = 3.7245e-01, PNorm = 63.7373, GNorm = 1.6546, lr_0 = 1.0657e-04
Loss = 3.9877e-01, PNorm = 63.7384, GNorm = 2.0748, lr_0 = 1.0650e-04
Loss = 4.1464e-01, PNorm = 63.7384, GNorm = 1.7407, lr_0 = 1.0643e-04
Loss = 3.6739e-01, PNorm = 63.7386, GNorm = 1.2347, lr_0 = 1.0635e-04
Loss = 3.6416e-01, PNorm = 63.7389, GNorm = 1.2345, lr_0 = 1.0628e-04
Loss = 3.4867e-01, PNorm = 63.7412, GNorm = 1.5613, lr_0 = 1.0621e-04
Loss = 3.3079e-01, PNorm = 63.7428, GNorm = 1.2059, lr_0 = 1.0614e-04
Loss = 2.9245e-01, PNorm = 63.7442, GNorm = 1.1005, lr_0 = 1.0606e-04
Loss = 4.2482e-01, PNorm = 63.7442, GNorm = 1.8937, lr_0 = 1.0599e-04
Loss = 3.1832e-01, PNorm = 63.7450, GNorm = 1.5040, lr_0 = 1.0592e-04
Loss = 3.5269e-01, PNorm = 63.7456, GNorm = 1.9678, lr_0 = 1.0585e-04
Loss = 3.8181e-01, PNorm = 63.7450, GNorm = 1.6695, lr_0 = 1.0577e-04
Loss = 2.8479e-01, PNorm = 63.7451, GNorm = 1.6116, lr_0 = 1.0570e-04
Loss = 3.6523e-01, PNorm = 63.7450, GNorm = 1.6595, lr_0 = 1.0563e-04
Loss = 2.9261e-01, PNorm = 63.7455, GNorm = 1.1731, lr_0 = 1.0556e-04
Loss = 3.5557e-01, PNorm = 63.7465, GNorm = 1.5662, lr_0 = 1.0548e-04
Loss = 3.1640e-01, PNorm = 63.7479, GNorm = 1.9168, lr_0 = 1.0541e-04
Loss = 3.9119e-01, PNorm = 63.7485, GNorm = 1.0837, lr_0 = 1.0534e-04
Loss = 3.5692e-01, PNorm = 63.7503, GNorm = 1.3796, lr_0 = 1.0527e-04
Loss = 3.3445e-01, PNorm = 63.7504, GNorm = 1.2693, lr_0 = 1.0519e-04
Loss = 3.8029e-01, PNorm = 63.7516, GNorm = 1.4771, lr_0 = 1.0512e-04
Loss = 3.3717e-01, PNorm = 63.7522, GNorm = 1.3639, lr_0 = 1.0505e-04
Loss = 3.6006e-01, PNorm = 63.7535, GNorm = 1.3657, lr_0 = 1.0498e-04
Loss = 3.6588e-01, PNorm = 63.7543, GNorm = 1.3546, lr_0 = 1.0491e-04
Loss = 4.2272e-01, PNorm = 63.7557, GNorm = 1.4551, lr_0 = 1.0483e-04
Loss = 3.4728e-01, PNorm = 63.7577, GNorm = 1.4774, lr_0 = 1.0476e-04
Loss = 3.1589e-01, PNorm = 63.7596, GNorm = 2.3937, lr_0 = 1.0469e-04
Loss = 3.3611e-01, PNorm = 63.7587, GNorm = 1.3266, lr_0 = 1.0462e-04
Loss = 3.3070e-01, PNorm = 63.7595, GNorm = 1.8431, lr_0 = 1.0455e-04
Loss = 2.8480e-01, PNorm = 63.7601, GNorm = 1.0141, lr_0 = 1.0448e-04
Loss = 3.4240e-01, PNorm = 63.7616, GNorm = 1.6257, lr_0 = 1.0440e-04
Loss = 3.3102e-01, PNorm = 63.7634, GNorm = 1.7000, lr_0 = 1.0433e-04
Loss = 3.5972e-01, PNorm = 63.7635, GNorm = 1.2905, lr_0 = 1.0426e-04
Loss = 3.3296e-01, PNorm = 63.7633, GNorm = 1.4590, lr_0 = 1.0419e-04
Loss = 3.2622e-01, PNorm = 63.7634, GNorm = 1.1500, lr_0 = 1.0412e-04
Loss = 3.2502e-01, PNorm = 63.7645, GNorm = 1.4514, lr_0 = 1.0405e-04
Loss = 3.1487e-01, PNorm = 63.7658, GNorm = 1.3517, lr_0 = 1.0398e-04
Loss = 2.9754e-01, PNorm = 63.7672, GNorm = 1.7008, lr_0 = 1.0391e-04
Loss = 3.7472e-01, PNorm = 63.7672, GNorm = 1.4199, lr_0 = 1.0383e-04
Loss = 3.1637e-01, PNorm = 63.7676, GNorm = 1.5478, lr_0 = 1.0376e-04
Loss = 3.7217e-01, PNorm = 63.7694, GNorm = 1.8929, lr_0 = 1.0369e-04
Loss = 3.5010e-01, PNorm = 63.7704, GNorm = 1.4921, lr_0 = 1.0362e-04
Loss = 3.7951e-01, PNorm = 63.7708, GNorm = 1.8031, lr_0 = 1.0355e-04
Loss = 3.6315e-01, PNorm = 63.7730, GNorm = 1.3886, lr_0 = 1.0348e-04
Loss = 3.0200e-01, PNorm = 63.7742, GNorm = 1.2345, lr_0 = 1.0341e-04
Loss = 3.7562e-01, PNorm = 63.7739, GNorm = 1.8187, lr_0 = 1.0334e-04
Loss = 3.5699e-01, PNorm = 63.7735, GNorm = 1.5795, lr_0 = 1.0327e-04
Loss = 3.2963e-01, PNorm = 63.7753, GNorm = 1.6669, lr_0 = 1.0320e-04
Loss = 3.5411e-01, PNorm = 63.7764, GNorm = 1.3689, lr_0 = 1.0312e-04
Loss = 3.5127e-01, PNorm = 63.7762, GNorm = 1.5490, lr_0 = 1.0305e-04
Loss = 3.9735e-01, PNorm = 63.7757, GNorm = 1.9562, lr_0 = 1.0298e-04
Loss = 3.3678e-01, PNorm = 63.7773, GNorm = 1.8185, lr_0 = 1.0291e-04
Loss = 3.7005e-01, PNorm = 63.7785, GNorm = 1.2299, lr_0 = 1.0284e-04
Loss = 3.1694e-01, PNorm = 63.7796, GNorm = 2.1626, lr_0 = 1.0277e-04
Loss = 3.2238e-01, PNorm = 63.7803, GNorm = 1.3538, lr_0 = 1.0270e-04
Loss = 3.1543e-01, PNorm = 63.7828, GNorm = 1.4404, lr_0 = 1.0263e-04
Loss = 3.1728e-01, PNorm = 63.7828, GNorm = 2.2209, lr_0 = 1.0256e-04
Loss = 3.4088e-01, PNorm = 63.7820, GNorm = 1.3064, lr_0 = 1.0249e-04
Loss = 3.2936e-01, PNorm = 63.7819, GNorm = 1.3901, lr_0 = 1.0242e-04
Loss = 3.1338e-01, PNorm = 63.7816, GNorm = 1.2381, lr_0 = 1.0235e-04
Loss = 3.6044e-01, PNorm = 63.7833, GNorm = 1.5107, lr_0 = 1.0228e-04
Loss = 3.4123e-01, PNorm = 63.7846, GNorm = 1.5491, lr_0 = 1.0221e-04
Loss = 2.9849e-01, PNorm = 63.7868, GNorm = 1.8945, lr_0 = 1.0214e-04
Loss = 3.2833e-01, PNorm = 63.7885, GNorm = 1.7707, lr_0 = 1.0207e-04
Loss = 3.6671e-01, PNorm = 63.7884, GNorm = 1.6440, lr_0 = 1.0200e-04
Loss = 3.0913e-01, PNorm = 63.7885, GNorm = 1.4765, lr_0 = 1.0193e-04
Loss = 4.0770e-01, PNorm = 63.7891, GNorm = 1.7282, lr_0 = 1.0186e-04
Loss = 3.2076e-01, PNorm = 63.7900, GNorm = 1.4306, lr_0 = 1.0179e-04
Loss = 3.2251e-01, PNorm = 63.7910, GNorm = 1.3693, lr_0 = 1.0172e-04
Loss = 3.0985e-01, PNorm = 63.7929, GNorm = 1.6283, lr_0 = 1.0165e-04
Loss = 3.0639e-01, PNorm = 63.7936, GNorm = 1.4342, lr_0 = 1.0158e-04
Loss = 3.6833e-01, PNorm = 63.7940, GNorm = 2.1076, lr_0 = 1.0151e-04
Loss = 3.3578e-01, PNorm = 63.7949, GNorm = 1.3119, lr_0 = 1.0144e-04
Loss = 3.7661e-01, PNorm = 63.7950, GNorm = 1.4572, lr_0 = 1.0137e-04
Loss = 3.1852e-01, PNorm = 63.7963, GNorm = 1.4847, lr_0 = 1.0130e-04
Loss = 3.6833e-01, PNorm = 63.7973, GNorm = 1.2003, lr_0 = 1.0123e-04
Loss = 2.8998e-01, PNorm = 63.7982, GNorm = 1.1659, lr_0 = 1.0116e-04
Loss = 2.9962e-01, PNorm = 63.7976, GNorm = 1.7013, lr_0 = 1.0110e-04
Loss = 3.1119e-01, PNorm = 63.7977, GNorm = 1.1614, lr_0 = 1.0103e-04
Loss = 2.9835e-01, PNorm = 63.7980, GNorm = 1.3821, lr_0 = 1.0096e-04
Loss = 3.0711e-01, PNorm = 63.7985, GNorm = 1.6737, lr_0 = 1.0089e-04
Loss = 3.4575e-01, PNorm = 63.7997, GNorm = 1.9145, lr_0 = 1.0082e-04
Loss = 3.6764e-01, PNorm = 63.7998, GNorm = 3.0171, lr_0 = 1.0075e-04
Loss = 3.5393e-01, PNorm = 63.7999, GNorm = 1.2654, lr_0 = 1.0068e-04
Loss = 3.3733e-01, PNorm = 63.8014, GNorm = 1.6360, lr_0 = 1.0061e-04
Loss = 3.8573e-01, PNorm = 63.8019, GNorm = 1.7424, lr_0 = 1.0054e-04
Loss = 4.0371e-01, PNorm = 63.8002, GNorm = 1.9697, lr_0 = 1.0047e-04
Loss = 3.4386e-01, PNorm = 63.8004, GNorm = 1.8919, lr_0 = 1.0041e-04
Loss = 3.5725e-01, PNorm = 63.8028, GNorm = 1.7067, lr_0 = 1.0034e-04
Loss = 3.4484e-01, PNorm = 63.8055, GNorm = 1.1413, lr_0 = 1.0027e-04
Loss = 3.7158e-01, PNorm = 63.8071, GNorm = 2.2458, lr_0 = 1.0020e-04
Loss = 3.8956e-01, PNorm = 63.8078, GNorm = 2.2276, lr_0 = 1.0013e-04
Loss = 3.7987e-01, PNorm = 63.8083, GNorm = 1.9582, lr_0 = 1.0006e-04
Loss = 2.9191e-01, PNorm = 63.8110, GNorm = 1.7781, lr_0 = 1.0000e-04
Validation mae = 0.110923
Model 0 best validation mae = 0.110796 on epoch 28
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.110349
Ensemble test mae = 0.110349
10-fold cross validation
	Seed 0 ==> test mae = 0.111068
	Seed 1 ==> test mae = 0.110959
	Seed 2 ==> test mae = 0.110745
	Seed 3 ==> test mae = 0.110720
	Seed 4 ==> test mae = 0.110406
	Seed 5 ==> test mae = 0.110571
	Seed 6 ==> test mae = 0.110407
	Seed 7 ==> test mae = 0.110007
	Seed 8 ==> test mae = 0.110286
	Seed 9 ==> test mae = 0.110349
Overall test mae = 0.110552 +/- 0.000308
Elapsed time = 5:23:54
